Skip to content

Commit a751c9b

Browse files
committed
clusterctl init: add flag for retrying cert-manager readiness check
This introduces a new clusterctl init flag "--retry-cert-manager-readiness-check" that allows to retry the check for an already installed cert-manager, which by default is only attempted once before a new cert-manager installation is started. When enabled, cert-manager readiness check will be retried for the duration specified in clusterctl config file's cert-manager.timeout entry or for a default timeout. See: kubernetes-sigs#11960
1 parent 9dddcb7 commit a751c9b

File tree

6 files changed

+116
-25
lines changed

6 files changed

+116
-25
lines changed

cmd/clusterctl/client/client_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ type fakeCertManagerClient struct {
251251

252252
var _ cluster.CertManagerClient = &fakeCertManagerClient{}
253253

254-
func (p *fakeCertManagerClient) EnsureInstalled(_ context.Context) error {
254+
func (p *fakeCertManagerClient) EnsureInstalled(_ context.Context, _ bool) error {
255255
return nil
256256
}
257257

cmd/clusterctl/client/cluster/cert_manager.go

+6-5
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ type CertManagerUpgradePlan struct {
7272
type CertManagerClient interface {
7373
// EnsureInstalled makes sure cert-manager is running and its API is available.
7474
// This is required to install a new provider.
75-
EnsureInstalled(ctx context.Context) error
75+
EnsureInstalled(ctx context.Context, retryReadinessCheck bool) error
7676

7777
// EnsureLatestVersion checks the cert-manager version currently installed, and if it is
7878
// older than the version currently suggested by clusterctl, upgrades it.
@@ -155,11 +155,11 @@ func (cm *certManagerClient) certManagerNamespaceExists(ctx context.Context) (bo
155155

156156
// EnsureInstalled makes sure cert-manager is running and its API is available.
157157
// This is required to install a new provider.
158-
func (cm *certManagerClient) EnsureInstalled(ctx context.Context) error {
158+
func (cm *certManagerClient) EnsureInstalled(ctx context.Context, retryReadinessCheck bool) error {
159159
log := logf.Log
160160

161161
// Checking if a version of cert manager supporting cert-manager-test-resources.yaml is already installed and properly working.
162-
if err := cm.waitForAPIReady(ctx, false); err == nil {
162+
if err := cm.waitForAPIReady(ctx, retryReadinessCheck); err == nil {
163163
log.Info("Skipping installing cert-manager as it is already installed")
164164
return nil
165165
}
@@ -555,13 +555,14 @@ func (cm *certManagerClient) waitForAPIReady(ctx context.Context, retry bool) er
555555
return err
556556
}
557557

558+
waitTimeout := cm.getWaitTimeout()
558559
for i := range testObjs {
559560
o := testObjs[i]
560561

561562
// Create the Kubernetes object.
562563
// This is wrapped with a retry as the cert-manager API may not be available
563-
// yet, so we need to keep retrying until it is.
564-
if err := cm.pollImmediateWaiter(ctx, waitCertManagerInterval, cm.getWaitTimeout(), func(ctx context.Context) (bool, error) {
564+
// yet, so we need to keep retrying until it is.createObjcreateObj
565+
if err := cm.pollImmediateWaiter(ctx, waitCertManagerInterval, waitTimeout, func(ctx context.Context) (bool, error) {
565566
if err := cm.createObj(ctx, o); err != nil {
566567
// If retrying is disabled, return the error here.
567568
if !retry {

cmd/clusterctl/client/cluster/cert_manager_test.go

+69-4
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"time"
2424

2525
. "github.com/onsi/gomega"
26+
"github.com/pkg/errors"
2627
admissionregistration "k8s.io/api/admissionregistration/v1"
2728
appsv1 "k8s.io/api/apps/v1"
2829
corev1 "k8s.io/api/core/v1"
@@ -808,6 +809,67 @@ func Test_certManagerClient_EnsureLatestVersion(t *testing.T) {
808809
}
809810
}
810811

812+
func Test_certManagerClient_EnsureInstalled(t *testing.T) {
813+
tests := []struct {
814+
name string
815+
retryReadinessCheck bool
816+
expectedError error
817+
expectedCertManagerCalls int
818+
}{
819+
{
820+
name: "Retries checking for existing cert-manager",
821+
retryReadinessCheck: true,
822+
// because of current EnsureInstalled logic, where the code to check for existing cert-manager
823+
// and code to do a new installation if that check failed call the same methods/functions
824+
// (or those methods/functions aren't really mockable from unit test POV), counting the number
825+
// of calls to cm.configClient.CertManager() seems like a reasonable way to differntiate between paths
826+
// and test the retry logic
827+
expectedCertManagerCalls: 1,
828+
},
829+
{
830+
name: "Checks for existing cert-manager only once",
831+
retryReadinessCheck: false,
832+
expectedCertManagerCalls: 3,
833+
},
834+
}
835+
836+
for _, tt := range tests {
837+
t.Run(tt.name, func(t *testing.T) {
838+
g := NewWithT(t)
839+
840+
fakeConfigClient := newFakeConfig()
841+
// make the proxy NewClient() calls return two errors on two initial calls, then nil's
842+
proxy := test.NewFakeProxy().WithNewClientErrors(
843+
errors.New("fail1"),
844+
errors.New("fail2"),
845+
)
846+
pollImmediateWaiter := func(ctx context.Context, _ time.Duration, _ time.Duration, f wait.ConditionWithContextFunc) error {
847+
// mimic non-test behavior
848+
return wait.PollUntilContextTimeout(ctx, 0, 10*time.Second, true, f)
849+
}
850+
repo := repository.NewMemoryRepository().
851+
WithPaths("root", "components.yaml").
852+
WithDefaultVersion(config.CertManagerDefaultVersion).
853+
WithFile(config.CertManagerDefaultVersion, "components.yaml", certManagerDeploymentYaml)
854+
repositoryClientFactory := func(ctx context.Context, provider config.Provider, configClient config.Client, _ ...repository.Option) (repository.Client, error) {
855+
return repository.New(ctx, provider, configClient, repository.InjectRepository(repo))
856+
}
857+
858+
cm := newCertManagerClient(fakeConfigClient, repositoryClientFactory, proxy, pollImmediateWaiter)
859+
860+
err := cm.EnsureInstalled(context.TODO(), tt.retryReadinessCheck)
861+
862+
if tt.expectedError != nil {
863+
g.Expect(err).To(HaveOccurred())
864+
g.Expect(err).To(MatchError(tt.expectedError))
865+
} else {
866+
g.Expect(err).NotTo(HaveOccurred())
867+
}
868+
g.Expect(fakeConfigClient.certManagerCalls).To(Equal(tt.expectedCertManagerCalls))
869+
})
870+
}
871+
}
872+
811873
func newFakeConfig() *fakeConfigClient {
812874
fakeReader := test.NewFakeReader()
813875

@@ -821,23 +883,26 @@ func newFakeConfig() *fakeConfigClient {
821883
type fakeConfigClient struct {
822884
fakeReader *test.FakeReader
823885
internalclient config.Client
886+
887+
certManagerCalls int
824888
}
825889

826890
var _ config.Client = &fakeConfigClient{}
827891

828-
func (f fakeConfigClient) CertManager() config.CertManagerClient {
892+
func (f *fakeConfigClient) CertManager() config.CertManagerClient {
893+
f.certManagerCalls++
829894
return f.internalclient.CertManager()
830895
}
831896

832-
func (f fakeConfigClient) Providers() config.ProvidersClient {
897+
func (f *fakeConfigClient) Providers() config.ProvidersClient {
833898
return f.internalclient.Providers()
834899
}
835900

836-
func (f fakeConfigClient) Variables() config.VariablesClient {
901+
func (f *fakeConfigClient) Variables() config.VariablesClient {
837902
return f.internalclient.Variables()
838903
}
839904

840-
func (f fakeConfigClient) ImageMeta() config.ImageMetaClient {
905+
func (f *fakeConfigClient) ImageMeta() config.ImageMetaClient {
841906
return f.internalclient.ImageMeta()
842907
}
843908

cmd/clusterctl/client/init.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ type InitOptions struct {
7676
// WaitProviderTimeout sets the timeout per provider wait installation
7777
WaitProviderTimeout time.Duration
7878

79+
// RetryCertManagerReadinessCheck instructs the init command to retry the check for cert-manager readiness
80+
// before attempting to install it.
81+
RetryCertManagerReadinessCheck bool
82+
7983
// SkipTemplateProcess allows for skipping the call to the template processor, including also variable replacement in the component YAML.
8084
// NOTE this works only if the rawYaml is a valid yaml by itself, like e.g when using envsubst/the simple processor.
8185
skipTemplateProcess bool
@@ -142,7 +146,7 @@ func (c *clusterctlClient) Init(ctx context.Context, options InitOptions) ([]Com
142146

143147
// Before installing the providers, ensure the cert-manager Webhook is in place.
144148
certManager := clusterClient.CertManager()
145-
if err := certManager.EnsureInstalled(ctx); err != nil {
149+
if err := certManager.EnsureInstalled(ctx, options.RetryCertManagerReadinessCheck); err != nil {
146150
return nil, err
147151
}
148152

cmd/clusterctl/cmd/init.go

+17-13
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ type initOptions struct {
4040
validate bool
4141
waitProviders bool
4242
waitProviderTimeout int
43+
retryCertManagerCheck bool
4344
}
4445

4546
var initOpts = &initOptions{}
@@ -117,6 +118,8 @@ func init() {
117118
"Wait timeout per provider installation in seconds. This value is ignored if --wait-providers is false")
118119
initCmd.Flags().BoolVar(&initOpts.validate, "validate", true,
119120
"If true, clusterctl will validate that the deployments will succeed on the management cluster.")
121+
initCmd.Flags().BoolVar(&initOpts.retryCertManagerCheck, "retry-cert-manager-readiness-check", false,
122+
"If true, clusterctl will retry checking for an existing cert-manager readiness using cert-manager.timeout entry value from the config file.")
120123

121124
initCmd.AddCommand(initListImagesCmd)
122125
RootCmd.AddCommand(initCmd)
@@ -131,19 +134,20 @@ func runInit() error {
131134
}
132135

133136
options := client.InitOptions{
134-
Kubeconfig: client.Kubeconfig{Path: initOpts.kubeconfig, Context: initOpts.kubeconfigContext},
135-
CoreProvider: initOpts.coreProvider,
136-
BootstrapProviders: initOpts.bootstrapProviders,
137-
ControlPlaneProviders: initOpts.controlPlaneProviders,
138-
InfrastructureProviders: initOpts.infrastructureProviders,
139-
IPAMProviders: initOpts.ipamProviders,
140-
RuntimeExtensionProviders: initOpts.runtimeExtensionProviders,
141-
AddonProviders: initOpts.addonProviders,
142-
TargetNamespace: initOpts.targetNamespace,
143-
LogUsageInstructions: true,
144-
WaitProviders: initOpts.waitProviders,
145-
WaitProviderTimeout: time.Duration(initOpts.waitProviderTimeout) * time.Second,
146-
IgnoreValidationErrors: !initOpts.validate,
137+
Kubeconfig: client.Kubeconfig{Path: initOpts.kubeconfig, Context: initOpts.kubeconfigContext},
138+
CoreProvider: initOpts.coreProvider,
139+
BootstrapProviders: initOpts.bootstrapProviders,
140+
ControlPlaneProviders: initOpts.controlPlaneProviders,
141+
InfrastructureProviders: initOpts.infrastructureProviders,
142+
IPAMProviders: initOpts.ipamProviders,
143+
RuntimeExtensionProviders: initOpts.runtimeExtensionProviders,
144+
AddonProviders: initOpts.addonProviders,
145+
TargetNamespace: initOpts.targetNamespace,
146+
LogUsageInstructions: true,
147+
WaitProviders: initOpts.waitProviders,
148+
WaitProviderTimeout: time.Duration(initOpts.waitProviderTimeout) * time.Second,
149+
IgnoreValidationErrors: !initOpts.validate,
150+
RetryCertManagerReadinessCheck: initOpts.retryCertManagerCheck,
147151
}
148152

149153
if _, err := c.Init(ctx, options); err != nil {

cmd/clusterctl/internal/test/fake_proxy.go

+18-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package test
1818

1919
import (
20+
"container/list"
2021
"context"
2122
"errors"
2223

@@ -46,6 +47,8 @@ type FakeProxy struct {
4647
namespace string
4748
objs []client.Object
4849
available *bool
50+
51+
newClientErrors *list.List
4952
}
5053

5154
var (
@@ -80,6 +83,12 @@ func (f *FakeProxy) GetConfig() (*rest.Config, error) {
8083
}
8184

8285
func (f *FakeProxy) NewClient(_ context.Context) (client.Client, error) {
86+
firstInserted := f.newClientErrors.Back()
87+
if firstInserted != nil {
88+
f.newClientErrors.Remove(firstInserted)
89+
return nil, firstInserted.Value.(error)
90+
}
91+
8392
if f.cs != nil {
8493
return f.cs, nil
8594
}
@@ -149,7 +158,8 @@ func (f *FakeProxy) GetResourceNames(_ context.Context, _, _ string, _ []client.
149158

150159
func NewFakeProxy() *FakeProxy {
151160
return &FakeProxy{
152-
namespace: "default",
161+
namespace: "default",
162+
newClientErrors: list.New(),
153163
}
154164
}
155165

@@ -163,6 +173,13 @@ func (f *FakeProxy) WithNamespace(n string) *FakeProxy {
163173
return f
164174
}
165175

176+
func (f *FakeProxy) WithNewClientErrors(errs ...error) *FakeProxy {
177+
for _, err := range errs {
178+
f.newClientErrors.PushFront(err)
179+
}
180+
return f
181+
}
182+
166183
// WithProviderInventory can be used as a fast track for setting up test scenarios requiring an already initialized management cluster.
167184
// NB. this method adds an items to the Provider inventory, but it doesn't install the corresponding provider; if the
168185
// test case requires the actual provider to be installed, use the fake client to install both the provider

0 commit comments

Comments
 (0)