From 88b84d7ae2fba9a81a34a1a1a21c2e9a0c9ef4ed Mon Sep 17 00:00:00 2001 From: Maxim Muzafarov Date: Mon, 18 Jul 2022 13:59:12 +0100 Subject: [PATCH 01/13] =?UTF-8?q?=E2=9C=A8=20Add=20ControlPlane=20upgrade?= =?UTF-8?q?=20featureGate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../controllers/provisioner/provisioner.go | 2 + .../provisioner/provisioner_aliyun.go | 4 + .../provisioner/provisioner_native.go | 118 +++++++++++------- .../controllers/virtualcluster_controller.go | 37 ++++++ .../pkg/controller/secret/secret.go | 12 ++ .../pkg/syncer/constants/constants.go | 8 ++ .../pkg/syncer/util/featuregate/gate.go | 5 + 7 files changed, 140 insertions(+), 46 deletions(-) diff --git a/virtualcluster/pkg/controller/controllers/provisioner/provisioner.go b/virtualcluster/pkg/controller/controllers/provisioner/provisioner.go index b2728c22..2a875103 100644 --- a/virtualcluster/pkg/controller/controllers/provisioner/provisioner.go +++ b/virtualcluster/pkg/controller/controllers/provisioner/provisioner.go @@ -26,4 +26,6 @@ type Provisioner interface { CreateVirtualCluster(ctx context.Context, vc *tenancyv1alpha1.VirtualCluster) error DeleteVirtualCluster(ctx context.Context, vc *tenancyv1alpha1.VirtualCluster) error GetProvisioner() string + // UpgradeVirtualCluster is used to apply current clusterversion if featuregate.VirtualClusterApplyUpdate enabled + UpgradeVirtualCluster(ctx context.Context, vc *tenancyv1alpha1.VirtualCluster) error } diff --git a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_aliyun.go b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_aliyun.go index c786ea93..1b5bd5de 100644 --- a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_aliyun.go +++ b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_aliyun.go @@ -288,3 +288,7 @@ OuterLoop: func (mpa *Aliyun) GetProvisioner() string { return "aliyun" } + +func (mpa *Aliyun) UpgradeVirtualCluster(ctx context.Context, vc *tenancyv1alpha1.VirtualCluster) error { + return fmt.Errorf("not implemented") +} diff --git a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go index b69a8fa1..62c8ed76 100644 --- a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go +++ b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go @@ -25,7 +25,6 @@ import ( "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/util/cert" "sigs.k8s.io/controller-runtime/pkg/client" @@ -36,7 +35,9 @@ import ( vcpki "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/controller/pki" "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/controller/secret" kubeutil "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/controller/util/kube" + "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/syncer/constants" "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/syncer/conversion" + "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/syncer/util/featuregate" pkiutil "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/util/pki" ) @@ -45,6 +46,11 @@ const ( ComponentPollPeriodSec = 2 ) +var ( + definitelyTrue = true + patchOptions = &client.PatchOptions{Force: &definitelyTrue, FieldManager: "virtualcluster/provisioner/native"} +) + type Native struct { client.Client scheme *runtime.Scheme @@ -61,60 +67,93 @@ func NewProvisionerNative(mgr manager.Manager, log logr.Logger, provisionerTimeo }, nil } +func updateLabelClusterVersionApplied(vc *tenancyv1alpha1.VirtualCluster, cv *tenancyv1alpha1.ClusterVersion) { + if featuregate.DefaultFeatureGate.Enabled(featuregate.VirtualClusterApplyUpdate) { + if vc.Labels == nil { + vc.Labels = map[string]string{} + } + vc.Labels[constants.LabelClusterVersionApplied] = cv.ObjectMeta.ResourceVersion + } +} + // CreateVirtualCluster sets up the control plane for vc on meta k8s func (mpn *Native) CreateVirtualCluster(ctx context.Context, vc *tenancyv1alpha1.VirtualCluster) error { + cv, err := mpn.fetchClusterVersion(vc) + if err != nil { + return err + } + + updateLabelClusterVersionApplied(vc, cv) + + // 1. create the root ns + _, err = kubeutil.CreateRootNS(mpn, vc) + if err != nil { + return err + } + return mpn.applyVirtualCluster(ctx, cv, vc) +} + +func (mpn *Native) fetchClusterVersion(vc *tenancyv1alpha1.VirtualCluster) (*tenancyv1alpha1.ClusterVersion, error) { cvObjectKey := client.ObjectKey{Name: vc.Spec.ClusterVersionName} cv := &tenancyv1alpha1.ClusterVersion{} if err := mpn.Get(context.Background(), cvObjectKey, cv); err != nil { err = fmt.Errorf("desired ClusterVersion %s not found", vc.Spec.ClusterVersionName) - return err + return nil, err } + return cv, nil +} - // 1. create the root ns - _, err := kubeutil.CreateRootNS(mpn, vc) +func (mpn *Native) UpgradeVirtualCluster(ctx context.Context, vc *tenancyv1alpha1.VirtualCluster) error { + cv, err := mpn.fetchClusterVersion(vc) if err != nil { return err } + if cvVersion, ok := vc.Labels[constants.LabelClusterVersionApplied]; ok && cvVersion == cv.ObjectMeta.ResourceVersion { + mpn.Log.Info("cluster is already in desired version") + return nil + } + updateLabelClusterVersionApplied(vc, cv) + return mpn.applyVirtualCluster(ctx, cv, vc) +} + +func (mpn *Native) applyVirtualCluster(ctx context.Context, cv *tenancyv1alpha1.ClusterVersion, vc *tenancyv1alpha1.VirtualCluster) error { + var err error isClusterIP := cv.Spec.APIServer.Service != nil && cv.Spec.APIServer.Service.Spec.Type == corev1.ServiceTypeClusterIP - // if ClusterIP, have to create API Server ahead of time to lay it down in the PKI + // if ClusterIP, have to update API Server ahead of time to lay it down in the PKI if isClusterIP { - mpn.Log.Info("deploying ClusterIP Service for API component", "component", cv.Spec.APIServer.Name) + mpn.Log.Info("applying ClusterIP Service for API component", "component", cv.Spec.APIServer.Name) complementAPIServerTemplate(conversion.ToClusterKey(vc), cv.Spec.APIServer) - err = mpn.Create(context.TODO(), cv.Spec.APIServer.Service) + err := mpn.Patch(ctx, cv.Spec.APIServer.Service, client.Apply, patchOptions) if err != nil { - if !apierrors.IsAlreadyExists(err) { - return err - } - mpn.Log.Info("service already exist", - "service", cv.Spec.APIServer.Service.GetName()) + mpn.Log.Error(err, "failed to update service", "service", cv.Spec.APIServer.Service.GetName()) + return err } } - // 2. create PKI - err = mpn.createPKI(vc, cv, isClusterIP) + // 2. apply PKI + err = mpn.createAndApplyPKI(ctx, vc, cv, isClusterIP) if err != nil { return err } // 3. deploy etcd - err = mpn.deployComponent(vc, cv.Spec.ETCD) + err = mpn.deployComponent(ctx, vc, cv.Spec.ETCD) if err != nil { return err } // 4. deploy apiserver - err = mpn.deployComponent(vc, cv.Spec.APIServer) + err = mpn.deployComponent(ctx, vc, cv.Spec.APIServer) if err != nil { return err } // 5. deploy controller-manager - err = mpn.deployComponent(vc, cv.Spec.ControllerManager) + err = mpn.deployComponent(ctx, vc, cv.Spec.ControllerManager) if err != nil { return err } - return nil } @@ -162,7 +201,7 @@ func complementCtrlMgrTemplate(vcns string, ctrlMgrBdl *tenancyv1alpha1.Stateful // deployComponent deploys control plane component in namespace vcName based on the given StatefulSet // and Service Bundle ssBdl -func (mpn *Native) deployComponent(vc *tenancyv1alpha1.VirtualCluster, ssBdl *tenancyv1alpha1.StatefulSetSvcBundle) error { +func (mpn *Native) deployComponent(ctx context.Context, vc *tenancyv1alpha1.VirtualCluster, ssBdl *tenancyv1alpha1.StatefulSetSvcBundle) error { mpn.Log.Info("deploying StatefulSet for control plane component", "component", ssBdl.Name) ns := conversion.ToClusterKey(vc) @@ -178,26 +217,17 @@ func (mpn *Native) deployComponent(vc *tenancyv1alpha1.VirtualCluster, ssBdl *te return fmt.Errorf("try to deploy unknown component: %s", ssBdl.Name) } - err := mpn.Create(context.TODO(), ssBdl.StatefulSet) + err := mpn.Patch(ctx, ssBdl.StatefulSet, client.Apply, patchOptions) if err != nil { - if !apierrors.IsAlreadyExists(err) { - return err - } - mpn.Log.Info("statefuleset already exist", - "statefuleset", ssBdl.StatefulSet.GetName(), - "namespace", ssBdl.StatefulSet.GetNamespace()) + return err } // skip apiserver clusterIP service creation as it is already created in CreateVirtualCluster() if ssBdl.Service != nil && !(ssBdl.Name == "apiserver" && ssBdl.Service.Spec.Type == corev1.ServiceTypeClusterIP) { mpn.Log.Info("deploying Service for control plane component", "component", ssBdl.Name) - err = mpn.Create(context.TODO(), ssBdl.Service) + err := mpn.Patch(ctx, ssBdl.Service, client.Apply, patchOptions) if err != nil { - if !apierrors.IsAlreadyExists(err) { - return err - } - mpn.Log.Info("service already exist", - "service", ssBdl.Service.GetName()) + return err } } @@ -209,9 +239,9 @@ func (mpn *Native) deployComponent(vc *tenancyv1alpha1.VirtualCluster, ssBdl *te return nil } -// createPKISecrets creates secrets to store crt/key pairs and kubeconfigs +// createOrUpdatePKISecrets creates secrets to store crt/key pairs and kubeconfigs // for control plane components of the virtual cluster -func (mpn *Native) createPKISecrets(caGroup *vcpki.ClusterCAGroup, namespace string) error { +func (mpn *Native) createOrUpdatePKISecrets(ctx context.Context, caGroup *vcpki.ClusterCAGroup, namespace string) error { // create secret for root crt/key pair rootSrt := secret.CrtKeyPairToSecret(secret.RootCASecretName, namespace, caGroup.RootCA) // create secret for apiserver crt/key pair @@ -240,25 +270,21 @@ func (mpn *Native) createPKISecrets(caGroup *vcpki.ClusterCAGroup, namespace str // create all secrets on metacluster for _, srt := range secrets { - mpn.Log.Info("creating secret", "name", + mpn.Log.Info("applying secret", "name", srt.Name, "namespace", srt.Namespace) - err := mpn.Create(context.TODO(), srt) + + err := mpn.Patch(ctx, srt, client.Apply, patchOptions) if err != nil { - if !apierrors.IsAlreadyExists(err) { - return err - } - mpn.Log.Info("Secret already exists", - "secret", srt.Name, - "namespace", srt.Namespace) + return err } } return nil } -// createPKI constructs the PKI (all crt/key pair and kubeconfig) for the +// createAndApplyPKI constructs the PKI (all crt/key pair and kubeconfig) for the // virtual clusters, and store them as secrets in the meta cluster -func (mpn *Native) createPKI(vc *tenancyv1alpha1.VirtualCluster, cv *tenancyv1alpha1.ClusterVersion, isClusterIP bool) error { +func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.VirtualCluster, cv *tenancyv1alpha1.ClusterVersion, isClusterIP bool) error { ns := conversion.ToClusterKey(vc) caGroup := &vcpki.ClusterCAGroup{} // create root ca, all components will share a single root ca @@ -346,7 +372,7 @@ func (mpn *Native) createPKI(vc *tenancyv1alpha1.VirtualCluster, cv *tenancyv1al caGroup.ServiceAccountPrivateKey = svcAcctCAPair // store ca and kubeconfig into secrets - genSrtsErr := mpn.createPKISecrets(caGroup, ns) + genSrtsErr := mpn.createOrUpdatePKISecrets(ctx, caGroup, ns) if genSrtsErr != nil { return genSrtsErr } @@ -354,7 +380,7 @@ func (mpn *Native) createPKI(vc *tenancyv1alpha1.VirtualCluster, cv *tenancyv1al return nil } -func (mpn *Native) DeleteVirtualCluster(ctx context.Context, vc *tenancyv1alpha1.VirtualCluster) error { +func (mpn *Native) DeleteVirtualCluster(_ context.Context, _ *tenancyv1alpha1.VirtualCluster) error { return nil } diff --git a/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go b/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go index 9fbfd013..b72812a4 100644 --- a/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go +++ b/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go @@ -25,6 +25,8 @@ import ( "github.com/go-logr/logr" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -35,6 +37,8 @@ import ( "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/controller/controllers/provisioner" kubeutil "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/controller/util/kube" strutil "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/controller/util/strings" + "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/syncer/constants" + "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/syncer/util/featuregate" ) // GetProvisioner returns a new provisioner.Provisioner by ProvisionerName @@ -163,6 +167,39 @@ func (r *ReconcileVirtualCluster) Reconcile(ctx context.Context, request reconci return case tenancyv1alpha1.ClusterRunning: r.Log.Info("VirtualCluster is running", "vc", vc.GetName()) + if !featuregate.DefaultFeatureGate.Enabled(featuregate.VirtualClusterApplyUpdate) { + return + } + if isReady, ok := vc.Labels[constants.LabelVCReadyForUpgrade]; !ok || isReady != "true" { + return + } + r.Log.Info("VirtualCluster is ready for upgrade", "vc", vc.GetName()) + err = r.Provisioner.UpgradeVirtualCluster(ctx, vc) + if err != nil { + r.Log.Error(err, "fail to upgrade virtualcluster", "vc", vc.GetName()) + kubeutil.SetVCStatus(vc, tenancyv1alpha1.ClusterRunning, fmt.Sprintf("fail to upgrade: %s", err), "TenantControlPlaneUpgradeFailed") + } else { + r.Log.Info("upgrade finished", "vc", vc.GetName()) + kubeutil.SetVCStatus(vc, tenancyv1alpha1.ClusterRunning, "tenant control plane is upgraded", "TenantControlPlaneUpgradeCompleted") + } + + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + vcStatus := vc.Status + delete(vc.Labels, constants.LabelVCReadyForUpgrade) + vcLabels := vc.Labels + updateErr := r.Update(ctx, vc) + if updateErr != nil { + if err := r.Get(ctx, types.NamespacedName{ + Namespace: vc.GetNamespace(), + Name: vc.GetName(), + }, vc); err != nil { + r.Log.Info("fail to get obj on update failure", "object", vc.GetName(), "error", err.Error()) + } + vc.Status = vcStatus + vc.Labels = vcLabels + } + return updateErr + }) return case tenancyv1alpha1.ClusterError: r.Log.Info("fail to create virtualcluster", "vc", vc.GetName()) diff --git a/virtualcluster/pkg/controller/secret/secret.go b/virtualcluster/pkg/controller/secret/secret.go index 6651fca3..301de16f 100644 --- a/virtualcluster/pkg/controller/secret/secret.go +++ b/virtualcluster/pkg/controller/secret/secret.go @@ -50,6 +50,10 @@ func RsaKeyToSecret(name, namespace string, rsaKey *rsa.PrivateKey) (*corev1.Sec return nil, err } return &corev1.Secret{ + TypeMeta: metav1.TypeMeta{ + Kind: "Secret", + APIVersion: corev1.SchemeGroupVersion.String(), + }, ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: namespace, @@ -65,6 +69,10 @@ func RsaKeyToSecret(name, namespace string, rsaKey *rsa.PrivateKey) (*corev1.Sec // CrtKeyPairToSecret encapsulates ca/key pair ckp into a secret object func CrtKeyPairToSecret(name, namespace string, ckp *vcpki.CrtKeyPair) *corev1.Secret { return &corev1.Secret{ + TypeMeta: metav1.TypeMeta{ + Kind: "Secret", + APIVersion: corev1.SchemeGroupVersion.String(), + }, ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: namespace, @@ -80,6 +88,10 @@ func CrtKeyPairToSecret(name, namespace string, ckp *vcpki.CrtKeyPair) *corev1.S // KubeconfigToSecret encapsulates kubeconfig cfgContent into a secret object func KubeconfigToSecret(name, namespace string, cfgContent string) *corev1.Secret { return &corev1.Secret{ + TypeMeta: metav1.TypeMeta{ + Kind: "Secret", + APIVersion: corev1.SchemeGroupVersion.String(), + }, ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: namespace, diff --git a/virtualcluster/pkg/syncer/constants/constants.go b/virtualcluster/pkg/syncer/constants/constants.go index b0de8612..bc077ea0 100644 --- a/virtualcluster/pkg/syncer/constants/constants.go +++ b/virtualcluster/pkg/syncer/constants/constants.go @@ -50,6 +50,14 @@ const ( // LabelVCRootNS means the namespace is the rootns created by vc-manager. LabelVCRootNS = "tenancy.x-k8s.io/vcrootns" + // LabelVCReadyForUpgrade is set to "true" when the cluster is ready for the upgrade being applied + // (use featuregate.VirtualClusterApplyUpdate to enable it in the provisioner) + LabelVCReadyForUpgrade = "tenancy.x-k8s.io/ready-for-upgrade" + + // LabelClusterVersionApplied should be set equal to the ClusterVersion.metadata.resourceVersion value + // This label is used in featuregate.VirtualClusterApplyUpdate to compare if the update must be applied. + LabelClusterVersionApplied = "tenancy.x-k8s.io/cluster-version-applied" + // LabelExternalApiserverDomain is the domain name for apiserver url from outside the cluster LabelExternalApiserverDomain = "tenancy.x-k8s.io/external-apiserver-domain" diff --git a/virtualcluster/pkg/syncer/util/featuregate/gate.go b/virtualcluster/pkg/syncer/util/featuregate/gate.go index 5a0f17bd..5c7c318e 100644 --- a/virtualcluster/pkg/syncer/util/featuregate/gate.go +++ b/virtualcluster/pkg/syncer/util/featuregate/gate.go @@ -64,6 +64,10 @@ const ( // vn-agent to run as a daemonset but run without hostNetworking and // accessed by the PodIP on each pod on the node VNodeProviderPodIP = "VNodeProviderPodIP" + + // VirtualClusterApplyUpdate is an experimental feature that allows the cluster provisioner + // to apply ClusterVersion updates if VirtualCluster object is requested it + VirtualClusterApplyUpdate = "VirtualClusterApplyUpdate" ) var defaultFeatures = FeatureList{ @@ -74,6 +78,7 @@ var defaultFeatures = FeatureList{ VNodeProviderService: {Default: false}, TenantAllowDNSPolicy: {Default: false}, VNodeProviderPodIP: {Default: false}, + VirtualClusterApplyUpdate: {Default: false}, } type Feature string From f58abed5eb83d3be670e8c63f751674ed180db79 Mon Sep 17 00:00:00 2001 From: Maxim Muzafarov Date: Mon, 18 Jul 2022 17:43:37 +0100 Subject: [PATCH 02/13] =?UTF-8?q?=E2=9C=A8=20Add=20ControlPlane=20upgrade?= =?UTF-8?q?=20testCase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../clusterversion_controller_test.go | 7 +- .../pkg/controller/controllers/suite_test.go | 3 + .../virtualcluster_controller_test.go | 74 ++++++++++++++++++- 3 files changed, 77 insertions(+), 7 deletions(-) diff --git a/virtualcluster/pkg/controller/controllers/clusterversion_controller_test.go b/virtualcluster/pkg/controller/controllers/clusterversion_controller_test.go index 42244fd5..a8d008c7 100644 --- a/virtualcluster/pkg/controller/controllers/clusterversion_controller_test.go +++ b/virtualcluster/pkg/controller/controllers/clusterversion_controller_test.go @@ -95,7 +95,7 @@ var defaultClusterVersion = &v1alpha1.ClusterVersionSpec{ }, }, UpdateStrategy: appsv1.StatefulSetUpdateStrategy{ - Type: appsv1.OnDeleteStatefulSetStrategyType, + Type: appsv1.RollingUpdateStatefulSetStrategyType, }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ @@ -247,7 +247,7 @@ var defaultClusterVersion = &v1alpha1.ClusterVersionSpec{ }, }, UpdateStrategy: appsv1.StatefulSetUpdateStrategy{ - Type: appsv1.OnDeleteStatefulSetStrategyType, + Type: appsv1.RollingUpdateStatefulSetStrategyType, }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ @@ -302,6 +302,7 @@ var defaultClusterVersion = &v1alpha1.ClusterVersionSpec{ { ContainerPort: 6443, Name: "api", + Protocol: corev1.ProtocolTCP, // Must be explicit in 1.19 }, }, LivenessProbe: &corev1.Probe{ @@ -453,7 +454,7 @@ var defaultClusterVersion = &v1alpha1.ClusterVersionSpec{ }, }, UpdateStrategy: appsv1.StatefulSetUpdateStrategy{ - Type: appsv1.OnDeleteStatefulSetStrategyType, + Type: appsv1.RollingUpdateStatefulSetStrategyType, }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ diff --git a/virtualcluster/pkg/controller/controllers/suite_test.go b/virtualcluster/pkg/controller/controllers/suite_test.go index 62e7923d..cfb03d5a 100644 --- a/virtualcluster/pkg/controller/controllers/suite_test.go +++ b/virtualcluster/pkg/controller/controllers/suite_test.go @@ -34,6 +34,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/apis" + "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/syncer/util/featuregate" // +kubebuilder:scaffold:imports ) @@ -72,6 +73,8 @@ var _ = BeforeSuite(func() { err = apis.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) + Expect(featuregate.DefaultFeatureGate.Set(featuregate.VirtualClusterApplyUpdate, true)).NotTo(HaveOccurred()) + // +kubebuilder:scaffold:scheme mgr, err := ctrl.NewManager(cfg, ctrl.Options{ diff --git a/virtualcluster/pkg/controller/controllers/virtualcluster_controller_test.go b/virtualcluster/pkg/controller/controllers/virtualcluster_controller_test.go index e7961367..a5e7b558 100644 --- a/virtualcluster/pkg/controller/controllers/virtualcluster_controller_test.go +++ b/virtualcluster/pkg/controller/controllers/virtualcluster_controller_test.go @@ -18,10 +18,10 @@ package controllers import ( "context" + "time" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -32,6 +32,7 @@ import ( tenancyv1alpha1 "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/apis/tenancy/v1alpha1" "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/controller/secret" + "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/syncer/constants" ) func getClusterObjectKey(instance *tenancyv1alpha1.VirtualCluster, name string) client.ObjectKey { @@ -41,11 +42,14 @@ func getClusterObjectKey(instance *tenancyv1alpha1.VirtualCluster, name string) var _ = Describe("VirtualCluster Controller", func() { Context("Reconcile VirtualCluster Cluster", func() { + var cvInstance *tenancyv1alpha1.ClusterVersion + var instance *tenancyv1alpha1.VirtualCluster + It("Should create resources successfully", func() { ctx := context.TODO() Expect(cli).ShouldNot(BeNil()) - cvInstance := createClusterVersion() + cvInstance = createClusterVersion() Expect(cli.Create(ctx, cvInstance)).Should(Succeed()) By("Fetching ClusterVersion") @@ -55,7 +59,7 @@ var _ = Describe("VirtualCluster Controller", func() { return !apierrors.IsNotFound(err) }, timeout, interval).Should(BeTrue()) - instance := &tenancyv1alpha1.VirtualCluster{ + instance = &tenancyv1alpha1.VirtualCluster{ ObjectMeta: metav1.ObjectMeta{ GenerateName: "virtualcluster-sample", Namespace: "default", @@ -161,9 +165,71 @@ var _ = Describe("VirtualCluster Controller", func() { By("Faking controller-manager STS Status Updates") err = cli.Status().Update(ctx, cmSts) Expect(err).To(BeNil()) + }) + It("Should upgrade resources successfully", func() { + ctx := context.TODO() + Expect(cli).ShouldNot(BeNil()) + + objectKey := client.ObjectKeyFromObject(instance) + + By("Checking cluster phase") + Eventually(func() bool { + err := cli.Get(ctx, objectKey, instance) + return err == nil && instance.Status.Phase == tenancyv1alpha1.ClusterRunning + }, time.Minute*5, interval).Should(BeTrue()) + + By("Updating ClusterVersion") + cvInstance.Spec.ETCD.StatefulSet.Spec.Template.Spec.Containers[0].Args = append([]string{"-debug"}, cvInstance.Spec.ETCD.StatefulSet.Spec.Template.Spec.Containers[0].Args...) + cvInstance.Spec.APIServer.StatefulSet.Spec.Template.Spec.Containers[0].Args = append([]string{"-v=7"}, cvInstance.Spec.APIServer.StatefulSet.Spec.Template.Spec.Containers[0].Args...) + if cvInstance.Spec.APIServer.Service.Labels == nil { + cvInstance.Spec.APIServer.Service.Labels = map[string]string{} + } + cvInstance.Spec.APIServer.Service.Labels["test-label"] = "test" + cvInstance.Spec.ControllerManager.StatefulSet.Spec.Template.Spec.Containers[0].Args = append([]string{"-v=7"}, cvInstance.Spec.ControllerManager.StatefulSet.Spec.Template.Spec.Containers[0].Args...) + cvInstance.ObjectMeta.ManagedFields = nil + forceTrue := true + Expect(cli.Patch(ctx, cvInstance, client.Apply, &client.PatchOptions{Force: &forceTrue, FieldManager: "test"})).Should(Succeed()) + + By("Enable upgrade for cluster") + instance.Labels[constants.LabelVCReadyForUpgrade] = "true" + instance.ObjectMeta.ManagedFields = nil + Expect(cli.Patch(ctx, instance, client.Apply, &client.PatchOptions{Force: &forceTrue, FieldManager: "test"})).Should(Succeed()) + + By("APIServer Service upgraded") + Eventually(func() bool { + svcObjectKey := getClusterObjectKey(instance, "apiserver-svc") + svc := &corev1.Service{} + err := cli.Get(ctx, svcObjectKey, svc) + return !apierrors.IsNotFound(err) && svc.Labels["test-label"] == "test" + }, time.Minute*2, interval).Should(BeTrue()) + etcdSts := &appsv1.StatefulSet{} + By("Control Plane etcd StatefulSet upgraded") + Eventually(func() bool { + stsObjectKey := getClusterObjectKey(instance, "etcd") + err := cli.Get(ctx, stsObjectKey, etcdSts) + return !apierrors.IsNotFound(err) && etcdSts.Spec.Template.Spec.Containers[0].Args[0] == "-debug" + }, time.Minute*2, interval).Should(BeTrue()) + + apiserverSts := &appsv1.StatefulSet{} + By("Control Plane apiserver StatefulSet upgraded") + Eventually(func() bool { + stsObjectKey := getClusterObjectKey(instance, "apiserver") + err := cli.Get(ctx, stsObjectKey, apiserverSts) + return !apierrors.IsNotFound(err) && apiserverSts.Spec.Template.Spec.Containers[0].Args[0] == "-v=7" + }, time.Minute*2, interval).Should(BeTrue()) + + cmSts := &appsv1.StatefulSet{} + By("Control Plane controller-manager StatefulSet upgraded") + Eventually(func() bool { + stsObjectKey := getClusterObjectKey(instance, "controller-manager") + err := cli.Get(ctx, stsObjectKey, cmSts) + return !apierrors.IsNotFound(err) && cmSts.Spec.Template.Spec.Containers[0].Args[0] == "-v=7" + }, time.Minute*2, interval).Should(BeTrue()) + }) + It("Should delete cluster", func() { By("Deleting VirtualCluster") - Expect(cli.Delete(ctx, instance)).To(BeNil()) + Expect(cli.Delete(context.TODO(), instance)).To(BeNil()) }) }) }) From 6697b8229acd771d76593fe2ef8c5c6823e49ce4 Mon Sep 17 00:00:00 2001 From: Maxim Muzafarov Date: Tue, 19 Jul 2022 16:54:25 +0100 Subject: [PATCH 03/13] Reuse rootCA cert if the secret is present --- .../provisioner/provisioner_native.go | 62 +++++++++++++------ virtualcluster/pkg/controller/pki/pki.go | 8 +++ virtualcluster/pkg/util/pki/util.go | 9 +++ 3 files changed, 61 insertions(+), 18 deletions(-) diff --git a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go index 62c8ed76..e3a8d509 100644 --- a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go +++ b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go @@ -287,26 +287,52 @@ func (mpn *Native) createOrUpdatePKISecrets(ctx context.Context, caGroup *vcpki. func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.VirtualCluster, cv *tenancyv1alpha1.ClusterVersion, isClusterIP bool) error { ns := conversion.ToClusterKey(vc) caGroup := &vcpki.ClusterCAGroup{} - // create root ca, all components will share a single root ca - rootCACrt, rootKey, rootCAErr := pkiutil.NewCertificateAuthority( - &pkiutil.CertConfig{ - Config: cert.Config{ - CommonName: "kubernetes", - Organization: []string{"kubernetes-sig.kubernetes-sigs/multi-tenancy.virtualcluster"}, - }, - }) - if rootCAErr != nil { - return rootCAErr - } - rootRsaKey, ok := rootKey.(*rsa.PrivateKey) - if !ok { - return errors.New("fail to assert rsa PrivateKey") - } + var rootCAPair *vcpki.CrtKeyPair + + // reuse rootCa if it is present + rootCaSecret := &corev1.Secret{} + err := mpn.Get(ctx, client.ObjectKey{Name: secret.RootCASecretName, Namespace: vc.Status.ClusterNamespace}, rootCaSecret) + if err == nil { + rootCACrt, rootCAErr := pkiutil.DecodeCertPEM(rootCaSecret.Data[corev1.TLSCertKey]) + if rootCAErr != nil { + return rootCAErr + } + + rootCAKey, rootCAErr := vcpki.DecodePrivateKeyPEM(rootCaSecret.Data[corev1.TLSPrivateKeyKey]) + if rootCAErr != nil { + return rootCAErr + } - rootCAPair := &vcpki.CrtKeyPair{ - Crt: rootCACrt, - Key: rootRsaKey, + rootCAPair = &vcpki.CrtKeyPair{ + Crt: rootCACrt, + Key: rootCAKey, + } + mpn.Log.Info("rootCA pair is reused from the secret") + } else { + mpn.Log.Error(err, "fail to get rootCA secret") + // create root ca, all components will share a single root ca + rootCACrt, rootKey, rootCAErr := pkiutil.NewCertificateAuthority( + &pkiutil.CertConfig{ + Config: cert.Config{ + CommonName: "kubernetes", + Organization: []string{"kubernetes-sig.kubernetes-sigs/multi-tenancy.virtualcluster"}, + }, + }) + if rootCAErr != nil { + return rootCAErr + } + + rootRsaKey, ok := rootKey.(*rsa.PrivateKey) + if !ok { + return errors.New("fail to assert rsa PrivateKey") + } + + rootCAPair = &vcpki.CrtKeyPair{ + Crt: rootCACrt, + Key: rootRsaKey, + } + mpn.Log.Info("rootCA pair generated") } caGroup.RootCA = rootCAPair diff --git a/virtualcluster/pkg/controller/pki/pki.go b/virtualcluster/pkg/controller/pki/pki.go index c2e6fa86..a0310b25 100644 --- a/virtualcluster/pkg/controller/pki/pki.go +++ b/virtualcluster/pkg/controller/pki/pki.go @@ -242,6 +242,14 @@ func EncodePrivateKeyPEM(key *rsa.PrivateKey) []byte { return pem.EncodeToMemory(&block) } +func DecodePrivateKeyPEM(raw []byte) (*rsa.PrivateKey, error) { + block, _ := pem.Decode(raw) + if block == nil { + return nil, fmt.Errorf("failed to decode private key") + } + return x509.ParsePKCS1PrivateKey(block.Bytes) +} + // newPrivateKey creates an RSA private key func newPrivateKey() (*rsa.PrivateKey, error) { return rsa.GenerateKey(cryptorand.Reader, 2048) diff --git a/virtualcluster/pkg/util/pki/util.go b/virtualcluster/pkg/util/pki/util.go index be6214d8..3974d52e 100644 --- a/virtualcluster/pkg/util/pki/util.go +++ b/virtualcluster/pkg/util/pki/util.go @@ -25,6 +25,7 @@ import ( "crypto/x509" "crypto/x509/pkix" "encoding/pem" + "fmt" "math" "math/big" "time" @@ -135,6 +136,14 @@ func EncodeCertPEM(cert *x509.Certificate) []byte { return pem.EncodeToMemory(&block) } +func DecodeCertPEM(raw []byte) (*x509.Certificate, error) { + block, _ := pem.Decode(raw) + if block == nil { + return nil, fmt.Errorf("failed to decode certificate") + } + return x509.ParseCertificate(block.Bytes) +} + // EncodePublicKeyPEM returns PEM-encoded public data func EncodePublicKeyPEM(key crypto.PublicKey) ([]byte, error) { der, err := x509.MarshalPKIXPublicKey(key) From 133150c0d886cca84e1b2eb96561aea2fdcc14fc Mon Sep 17 00:00:00 2001 From: Maxim Muzafarov Date: Wed, 20 Jul 2022 10:09:14 +0100 Subject: [PATCH 04/13] Rename featureGate to ClusterVersionApplyCurrentState --- .../provisioner/provisioner_native.go | 2 +- .../pkg/controller/controllers/suite_test.go | 2 +- .../controllers/virtualcluster_controller.go | 2 +- .../pkg/syncer/util/featuregate/gate.go | 20 +++++++++---------- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go index e3a8d509..b46b30b4 100644 --- a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go +++ b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go @@ -68,7 +68,7 @@ func NewProvisionerNative(mgr manager.Manager, log logr.Logger, provisionerTimeo } func updateLabelClusterVersionApplied(vc *tenancyv1alpha1.VirtualCluster, cv *tenancyv1alpha1.ClusterVersion) { - if featuregate.DefaultFeatureGate.Enabled(featuregate.VirtualClusterApplyUpdate) { + if featuregate.DefaultFeatureGate.Enabled(featuregate.ClusterVersionApplyCurrentState) { if vc.Labels == nil { vc.Labels = map[string]string{} } diff --git a/virtualcluster/pkg/controller/controllers/suite_test.go b/virtualcluster/pkg/controller/controllers/suite_test.go index cfb03d5a..f857695b 100644 --- a/virtualcluster/pkg/controller/controllers/suite_test.go +++ b/virtualcluster/pkg/controller/controllers/suite_test.go @@ -73,7 +73,7 @@ var _ = BeforeSuite(func() { err = apis.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) - Expect(featuregate.DefaultFeatureGate.Set(featuregate.VirtualClusterApplyUpdate, true)).NotTo(HaveOccurred()) + Expect(featuregate.DefaultFeatureGate.Set(featuregate.ClusterVersionApplyCurrentState, true)).NotTo(HaveOccurred()) // +kubebuilder:scaffold:scheme diff --git a/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go b/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go index b72812a4..8dcecf62 100644 --- a/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go +++ b/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go @@ -167,7 +167,7 @@ func (r *ReconcileVirtualCluster) Reconcile(ctx context.Context, request reconci return case tenancyv1alpha1.ClusterRunning: r.Log.Info("VirtualCluster is running", "vc", vc.GetName()) - if !featuregate.DefaultFeatureGate.Enabled(featuregate.VirtualClusterApplyUpdate) { + if !featuregate.DefaultFeatureGate.Enabled(featuregate.ClusterVersionApplyCurrentState) { return } if isReady, ok := vc.Labels[constants.LabelVCReadyForUpgrade]; !ok || isReady != "true" { diff --git a/virtualcluster/pkg/syncer/util/featuregate/gate.go b/virtualcluster/pkg/syncer/util/featuregate/gate.go index 5c7c318e..88ecd54b 100644 --- a/virtualcluster/pkg/syncer/util/featuregate/gate.go +++ b/virtualcluster/pkg/syncer/util/featuregate/gate.go @@ -65,20 +65,20 @@ const ( // accessed by the PodIP on each pod on the node VNodeProviderPodIP = "VNodeProviderPodIP" - // VirtualClusterApplyUpdate is an experimental feature that allows the cluster provisioner + // ClusterVersionApplyCurrentState is an experimental feature that allows the cluster provisioner // to apply ClusterVersion updates if VirtualCluster object is requested it - VirtualClusterApplyUpdate = "VirtualClusterApplyUpdate" + ClusterVersionApplyCurrentState = "ClusterVersionApplyCurrentState" ) var defaultFeatures = FeatureList{ - SuperClusterPooling: {Default: false}, - SuperClusterServiceNetwork: {Default: false}, - SuperClusterLabelling: {Default: false}, - SuperClusterLabelFilter: {Default: false}, - VNodeProviderService: {Default: false}, - TenantAllowDNSPolicy: {Default: false}, - VNodeProviderPodIP: {Default: false}, - VirtualClusterApplyUpdate: {Default: false}, + SuperClusterPooling: {Default: false}, + SuperClusterServiceNetwork: {Default: false}, + SuperClusterLabelling: {Default: false}, + SuperClusterLabelFilter: {Default: false}, + VNodeProviderService: {Default: false}, + TenantAllowDNSPolicy: {Default: false}, + VNodeProviderPodIP: {Default: false}, + ClusterVersionApplyCurrentState: {Default: false}, } type Feature string From 08b363f4520cd59e472e6b10ee9f92f8ae9bfb0b Mon Sep 17 00:00:00 2001 From: Maxim Muzafarov Date: Wed, 20 Jul 2022 11:09:05 +0100 Subject: [PATCH 05/13] Add secrets annotations to StatefulSetSpecs --- .../provisioner/provisioner_native.go | 83 +++++++++++++------ .../pkg/controller/secret/secret.go | 10 +++ 2 files changed, 68 insertions(+), 25 deletions(-) diff --git a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go index b46b30b4..ca0e4114 100644 --- a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go +++ b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go @@ -123,7 +123,7 @@ func (mpn *Native) applyVirtualCluster(ctx context.Context, cv *tenancyv1alpha1. // if ClusterIP, have to update API Server ahead of time to lay it down in the PKI if isClusterIP { mpn.Log.Info("applying ClusterIP Service for API component", "component", cv.Spec.APIServer.Name) - complementAPIServerTemplate(conversion.ToClusterKey(vc), cv.Spec.APIServer) + complementAPIServerTemplate(conversion.ToClusterKey(vc), cv.Spec.APIServer, nil) err := mpn.Patch(ctx, cv.Spec.APIServer.Service, client.Apply, patchOptions) if err != nil { mpn.Log.Error(err, "failed to update service", "service", cv.Spec.APIServer.Service.GetName()) @@ -132,25 +132,25 @@ func (mpn *Native) applyVirtualCluster(ctx context.Context, cv *tenancyv1alpha1. } // 2. apply PKI - err = mpn.createAndApplyPKI(ctx, vc, cv, isClusterIP) + clusterCAGroup, err := mpn.createAndApplyPKI(ctx, vc, cv, isClusterIP) if err != nil { return err } // 3. deploy etcd - err = mpn.deployComponent(ctx, vc, cv.Spec.ETCD) + err = mpn.deployComponent(ctx, vc, cv.Spec.ETCD, clusterCAGroup) if err != nil { return err } // 4. deploy apiserver - err = mpn.deployComponent(ctx, vc, cv.Spec.APIServer) + err = mpn.deployComponent(ctx, vc, cv.Spec.APIServer, clusterCAGroup) if err != nil { return err } // 5. deploy controller-manager - err = mpn.deployComponent(ctx, vc, cv.Spec.ControllerManager) + err = mpn.deployComponent(ctx, vc, cv.Spec.ControllerManager, clusterCAGroup) if err != nil { return err } @@ -176,7 +176,7 @@ func genInitialClusterArgs(replicas int32, stsName, svcName string) (argsVal str // complementETCDTemplate complements the ETCD template of the specified clusterversion // based on the virtual cluster setting -func complementETCDTemplate(vcns string, etcdBdl *tenancyv1alpha1.StatefulSetSvcBundle) { +func complementETCDTemplate(vcns string, etcdBdl *tenancyv1alpha1.StatefulSetSvcBundle, clusterCAGroup *vcpki.ClusterCAGroup) { etcdBdl.StatefulSet.ObjectMeta.Namespace = vcns etcdBdl.Service.ObjectMeta.Namespace = vcns args := etcdBdl.StatefulSet.Spec.Template.Spec.Containers[0].Args @@ -184,35 +184,67 @@ func complementETCDTemplate(vcns string, etcdBdl *tenancyv1alpha1.StatefulSetSvc etcdBdl.StatefulSet.Name, etcdBdl.Service.Name) args = append(args, "--initial-cluster", icaVal) etcdBdl.StatefulSet.Spec.Template.Spec.Containers[0].Args = args + + annotations := etcdBdl.StatefulSet.Spec.Template.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + annotations[secret.RootCASecretName+"-hash"] = secret.GetHash(clusterCAGroup.RootCA) + annotations[secret.ETCDCASecretName+"-hash"] = secret.GetHash(clusterCAGroup.ETCD) + etcdBdl.StatefulSet.Spec.Template.SetAnnotations(annotations) } // complementAPIServerTemplate complements the apiserver template of the specified clusterversion // based on the virtual cluster setting -func complementAPIServerTemplate(vcns string, apiserverBdl *tenancyv1alpha1.StatefulSetSvcBundle) { +func complementAPIServerTemplate(vcns string, apiserverBdl *tenancyv1alpha1.StatefulSetSvcBundle, clusterCAGroup *vcpki.ClusterCAGroup) { apiserverBdl.StatefulSet.ObjectMeta.Namespace = vcns apiserverBdl.Service.ObjectMeta.Namespace = vcns + + // we use complementAPIServerTemplate for service creation before creating certs if the service isClusterIP + if clusterCAGroup == nil { + return + } + + annotations := apiserverBdl.StatefulSet.Spec.Template.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + annotations[secret.RootCASecretName+"-hash"] = secret.GetHash(clusterCAGroup.RootCA) + annotations[secret.APIServerCASecretName+"-hash"] = secret.GetHash(clusterCAGroup.APIServer) + annotations[secret.FrontProxyCASecretName+"-hash"] = secret.GetHash(clusterCAGroup.FrontProxy) + annotations[secret.ServiceAccountSecretName+"-hash"] = secret.GetHash(clusterCAGroup.ServiceAccountPrivateKey) + apiserverBdl.StatefulSet.Spec.Template.SetAnnotations(annotations) } // complementCtrlMgrTemplate complements the controller manager template of the specified clusterversion // based on the virtual cluster setting -func complementCtrlMgrTemplate(vcns string, ctrlMgrBdl *tenancyv1alpha1.StatefulSetSvcBundle) { +func complementCtrlMgrTemplate(vcns string, ctrlMgrBdl *tenancyv1alpha1.StatefulSetSvcBundle, clusterCAGroup *vcpki.ClusterCAGroup) { ctrlMgrBdl.StatefulSet.ObjectMeta.Namespace = vcns + annotations := ctrlMgrBdl.StatefulSet.Spec.Template.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + annotations[secret.RootCASecretName+"-hash"] = secret.GetHash(clusterCAGroup.RootCA) + annotations[secret.ServiceAccountSecretName+"-hash"] = secret.GetHash(clusterCAGroup.ServiceAccountPrivateKey) + annotations[secret.ControllerManagerSecretName+"-hash"] = secret.GetHash(clusterCAGroup.CtrlMgrKbCfg) + ctrlMgrBdl.StatefulSet.Spec.Template.SetAnnotations(annotations) } // deployComponent deploys control plane component in namespace vcName based on the given StatefulSet // and Service Bundle ssBdl -func (mpn *Native) deployComponent(ctx context.Context, vc *tenancyv1alpha1.VirtualCluster, ssBdl *tenancyv1alpha1.StatefulSetSvcBundle) error { +// the method also adds annotations with certificates hashes to trigger pod recreation if certificates were changed +func (mpn *Native) deployComponent(ctx context.Context, vc *tenancyv1alpha1.VirtualCluster, ssBdl *tenancyv1alpha1.StatefulSetSvcBundle, clusterCAGroup *vcpki.ClusterCAGroup) error { mpn.Log.Info("deploying StatefulSet for control plane component", "component", ssBdl.Name) ns := conversion.ToClusterKey(vc) switch ssBdl.Name { case "etcd": - complementETCDTemplate(ns, ssBdl) + complementETCDTemplate(ns, ssBdl, clusterCAGroup) case "apiserver": - complementAPIServerTemplate(ns, ssBdl) + complementAPIServerTemplate(ns, ssBdl, clusterCAGroup) case "controller-manager": - complementCtrlMgrTemplate(ns, ssBdl) + complementCtrlMgrTemplate(ns, ssBdl, clusterCAGroup) default: return fmt.Errorf("try to deploy unknown component: %s", ssBdl.Name) } @@ -284,7 +316,8 @@ func (mpn *Native) createOrUpdatePKISecrets(ctx context.Context, caGroup *vcpki. // createAndApplyPKI constructs the PKI (all crt/key pair and kubeconfig) for the // virtual clusters, and store them as secrets in the meta cluster -func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.VirtualCluster, cv *tenancyv1alpha1.ClusterVersion, isClusterIP bool) error { +// The method returns the current ClusterCAGroup to use it as annotations for control-plane pods for restart +func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.VirtualCluster, cv *tenancyv1alpha1.ClusterVersion, isClusterIP bool) (*vcpki.ClusterCAGroup, error) { ns := conversion.ToClusterKey(vc) caGroup := &vcpki.ClusterCAGroup{} @@ -296,12 +329,12 @@ func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.Vi if err == nil { rootCACrt, rootCAErr := pkiutil.DecodeCertPEM(rootCaSecret.Data[corev1.TLSCertKey]) if rootCAErr != nil { - return rootCAErr + return nil, rootCAErr } rootCAKey, rootCAErr := vcpki.DecodePrivateKeyPEM(rootCaSecret.Data[corev1.TLSPrivateKeyKey]) if rootCAErr != nil { - return rootCAErr + return nil, rootCAErr } rootCAPair = &vcpki.CrtKeyPair{ @@ -320,12 +353,12 @@ func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.Vi }, }) if rootCAErr != nil { - return rootCAErr + return nil, rootCAErr } rootRsaKey, ok := rootKey.(*rsa.PrivateKey) if !ok { - return errors.New("fail to assert rsa PrivateKey") + return nil, errors.New("fail to assert rsa PrivateKey") } rootCAPair = &vcpki.CrtKeyPair{ @@ -340,14 +373,14 @@ func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.Vi // create crt, key for etcd etcdCAPair, etcdCrtErr := vcpki.NewEtcdServerCertAndKey(rootCAPair, etcdDomains) if etcdCrtErr != nil { - return etcdCrtErr + return nil, etcdCrtErr } caGroup.ETCD = etcdCAPair // create crt, key for frontendproxy frontProxyCAPair, frontProxyCrtErr := vcpki.NewFrontProxyClientCertAndKey(rootCAPair) if frontProxyCrtErr != nil { - return frontProxyCrtErr + return nil, frontProxyCrtErr } caGroup.FrontProxy = frontProxyCAPair @@ -363,7 +396,7 @@ func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.Vi apiserverDomain := cv.GetAPIServerDomain(ns) apiserverCAPair, err := vcpki.NewAPIServerCrtAndKey(rootCAPair, vc, apiserverDomain, clusterIP) if err != nil { - return err + return nil, err } caGroup.APIServer = apiserverCAPair @@ -377,7 +410,7 @@ func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.Vi "system:kube-controller-manager", vc.Name, finalAPIAddress, []string{}, rootCAPair) if err != nil { - return err + return nil, err } caGroup.CtrlMgrKbCfg = ctrlmgrKbCfg @@ -386,24 +419,24 @@ func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.Vi "admin", vc.Name, finalAPIAddress, []string{"system:masters"}, rootCAPair) if err != nil { - return err + return nil, err } caGroup.AdminKbCfg = adminKbCfg // create rsa key for service-account svcAcctCAPair, err := vcpki.NewServiceAccountSigningKey() if err != nil { - return err + return nil, err } caGroup.ServiceAccountPrivateKey = svcAcctCAPair // store ca and kubeconfig into secrets genSrtsErr := mpn.createOrUpdatePKISecrets(ctx, caGroup, ns) if genSrtsErr != nil { - return genSrtsErr + return nil, genSrtsErr } - return nil + return caGroup, nil } func (mpn *Native) DeleteVirtualCluster(_ context.Context, _ *tenancyv1alpha1.VirtualCluster) error { diff --git a/virtualcluster/pkg/controller/secret/secret.go b/virtualcluster/pkg/controller/secret/secret.go index 301de16f..36d6a3f5 100644 --- a/virtualcluster/pkg/controller/secret/secret.go +++ b/virtualcluster/pkg/controller/secret/secret.go @@ -18,6 +18,8 @@ package secret import ( "crypto/rsa" + "crypto/sha256" + "fmt" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -43,6 +45,14 @@ const ( ServiceAccountSecretName = "serviceaccount-rsa" ) +// GetHash hashes object to sha256 for annotations +func GetHash(o interface{}) string { + h := sha256.New() + h.Write([]byte(fmt.Sprintf("%v", o))) + + return fmt.Sprintf("%x", h.Sum(nil)) +} + // RsaKeyToSecret encapsulates rsaKey into a secret object func RsaKeyToSecret(name, namespace string, rsaKey *rsa.PrivateKey) (*corev1.Secret, error) { encodedPubKey, err := pkiutil.EncodePublicKeyPEM(&rsaKey.PublicKey) From 947c6b392917e2153e4d06283365b3fe65531710 Mon Sep 17 00:00:00 2001 From: Maxim Muzafarov Date: Wed, 20 Jul 2022 11:10:21 +0100 Subject: [PATCH 06/13] Add metrics about upgrade --- .../pkg/controller/controllers/metrics.go | 32 +++++++++++++++++++ .../controllers/virtualcluster_controller.go | 3 ++ 2 files changed, 35 insertions(+) create mode 100644 virtualcluster/pkg/controller/controllers/metrics.go diff --git a/virtualcluster/pkg/controller/controllers/metrics.go b/virtualcluster/pkg/controller/controllers/metrics.go new file mode 100644 index 00000000..4ec3f4f6 --- /dev/null +++ b/virtualcluster/pkg/controller/controllers/metrics.go @@ -0,0 +1,32 @@ +package controllers + +import ( + "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/metrics" + + "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/syncer/util/featuregate" +) + +var ( + clustersUpdatedCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "clusters_updated", + Help: "Amount of clusters upgraded by reconciler in featuregate.ClusterVersionApplyCurrentState", + }, + []string{"cluster_version", "resource_version"}, + ) + clustersUpdateSeconds = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "clusters_update_seconds", + Help: "Duration of cluster upgrade by reconciler in featuregate.ClusterVersionApplyCurrentState", + }, + []string{"cluster_version", "resource_version"}, + ) +) + +func init() { + // Expose featuregate.ClusterVersionApplyCurrentState metrics only if it enabled + if featuregate.DefaultFeatureGate.Enabled(featuregate.ClusterVersionApplyCurrentState) { + metrics.Registry.MustRegister(clustersUpdatedCounter, clustersUpdateSeconds) + } +} diff --git a/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go b/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go index 8dcecf62..04eaf12d 100644 --- a/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go +++ b/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go @@ -174,13 +174,16 @@ func (r *ReconcileVirtualCluster) Reconcile(ctx context.Context, request reconci return } r.Log.Info("VirtualCluster is ready for upgrade", "vc", vc.GetName()) + upgradeStartTimestamp := time.Now() err = r.Provisioner.UpgradeVirtualCluster(ctx, vc) + clustersUpdateSeconds.WithLabelValues(vc.Spec.ClusterVersionName, vc.Labels[constants.LabelClusterVersionApplied]).Observe(time.Since(upgradeStartTimestamp).Seconds()) if err != nil { r.Log.Error(err, "fail to upgrade virtualcluster", "vc", vc.GetName()) kubeutil.SetVCStatus(vc, tenancyv1alpha1.ClusterRunning, fmt.Sprintf("fail to upgrade: %s", err), "TenantControlPlaneUpgradeFailed") } else { r.Log.Info("upgrade finished", "vc", vc.GetName()) kubeutil.SetVCStatus(vc, tenancyv1alpha1.ClusterRunning, "tenant control plane is upgraded", "TenantControlPlaneUpgradeCompleted") + clustersUpdatedCounter.WithLabelValues(vc.Spec.ClusterVersionName, vc.Labels[constants.LabelClusterVersionApplied]).Inc() } err = retry.RetryOnConflict(retry.DefaultRetry, func() error { From 5abce28e7f71761abd520322f5f5588aac0b80cf Mon Sep 17 00:00:00 2001 From: Maxim Muzafarov Date: Wed, 20 Jul 2022 12:51:11 +0100 Subject: [PATCH 07/13] Disable etcd certificate restarts --- .../controllers/provisioner/provisioner_native.go | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go index ca0e4114..f43666c6 100644 --- a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go +++ b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go @@ -176,7 +176,8 @@ func genInitialClusterArgs(replicas int32, stsName, svcName string) (argsVal str // complementETCDTemplate complements the ETCD template of the specified clusterversion // based on the virtual cluster setting -func complementETCDTemplate(vcns string, etcdBdl *tenancyv1alpha1.StatefulSetSvcBundle, clusterCAGroup *vcpki.ClusterCAGroup) { +// etcd watches certificates and does not need watch-restart mechanism like apiserver or controller-manager +func complementETCDTemplate(vcns string, etcdBdl *tenancyv1alpha1.StatefulSetSvcBundle) { etcdBdl.StatefulSet.ObjectMeta.Namespace = vcns etcdBdl.Service.ObjectMeta.Namespace = vcns args := etcdBdl.StatefulSet.Spec.Template.Spec.Containers[0].Args @@ -184,14 +185,6 @@ func complementETCDTemplate(vcns string, etcdBdl *tenancyv1alpha1.StatefulSetSvc etcdBdl.StatefulSet.Name, etcdBdl.Service.Name) args = append(args, "--initial-cluster", icaVal) etcdBdl.StatefulSet.Spec.Template.Spec.Containers[0].Args = args - - annotations := etcdBdl.StatefulSet.Spec.Template.GetAnnotations() - if annotations == nil { - annotations = map[string]string{} - } - annotations[secret.RootCASecretName+"-hash"] = secret.GetHash(clusterCAGroup.RootCA) - annotations[secret.ETCDCASecretName+"-hash"] = secret.GetHash(clusterCAGroup.ETCD) - etcdBdl.StatefulSet.Spec.Template.SetAnnotations(annotations) } // complementAPIServerTemplate complements the apiserver template of the specified clusterversion @@ -240,7 +233,7 @@ func (mpn *Native) deployComponent(ctx context.Context, vc *tenancyv1alpha1.Virt switch ssBdl.Name { case "etcd": - complementETCDTemplate(ns, ssBdl, clusterCAGroup) + complementETCDTemplate(ns, ssBdl) case "apiserver": complementAPIServerTemplate(ns, ssBdl, clusterCAGroup) case "controller-manager": From c695b42a0ad044e2c379886e19f0215bd764caf8 Mon Sep 17 00:00:00 2001 From: Maxim Muzafarov Date: Wed, 20 Jul 2022 13:35:30 +0100 Subject: [PATCH 08/13] Register metrics in controller --- virtualcluster/pkg/controller/controllers/metrics.go | 10 ---------- .../controllers/virtualcluster_controller.go | 10 +++++++++- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/virtualcluster/pkg/controller/controllers/metrics.go b/virtualcluster/pkg/controller/controllers/metrics.go index 4ec3f4f6..3b6ebe31 100644 --- a/virtualcluster/pkg/controller/controllers/metrics.go +++ b/virtualcluster/pkg/controller/controllers/metrics.go @@ -2,9 +2,6 @@ package controllers import ( "github.com/prometheus/client_golang/prometheus" - "sigs.k8s.io/controller-runtime/pkg/metrics" - - "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/syncer/util/featuregate" ) var ( @@ -23,10 +20,3 @@ var ( []string{"cluster_version", "resource_version"}, ) ) - -func init() { - // Expose featuregate.ClusterVersionApplyCurrentState metrics only if it enabled - if featuregate.DefaultFeatureGate.Enabled(featuregate.ClusterVersionApplyCurrentState) { - metrics.Registry.MustRegister(clustersUpdatedCounter, clustersUpdateSeconds) - } -} diff --git a/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go b/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go index 04eaf12d..6046363f 100644 --- a/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go +++ b/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go @@ -27,10 +27,10 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/retry" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/metrics" "sigs.k8s.io/controller-runtime/pkg/reconcile" tenancyv1alpha1 "sigs.k8s.io/cluster-api-provider-nested/virtualcluster/pkg/apis/tenancy/v1alpha1" @@ -71,6 +71,14 @@ func (r *ReconcileVirtualCluster) SetupWithManager(mgr ctrl.Manager, opts contro } r.Provisioner = provisioner + // Expose featuregate.ClusterVersionApplyCurrentState metrics only if it enabled + if featuregate.DefaultFeatureGate.Enabled(featuregate.ClusterVersionApplyCurrentState) { + metrics.Registry.MustRegister( + clustersUpdatedCounter, + clustersUpdateSeconds, + ) + } + return ctrl.NewControllerManagedBy(mgr). WithOptions(opts). For(&tenancyv1alpha1.VirtualCluster{}). From 5938bf0813698a96d992a9444875497d4a644a20 Mon Sep 17 00:00:00 2001 From: Maxim Muzafarov Date: Wed, 20 Jul 2022 13:58:10 +0100 Subject: [PATCH 09/13] Use custom buckets (the cluster upgrade is slow) --- virtualcluster/pkg/controller/controllers/metrics.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/virtualcluster/pkg/controller/controllers/metrics.go b/virtualcluster/pkg/controller/controllers/metrics.go index 3b6ebe31..481e7b7a 100644 --- a/virtualcluster/pkg/controller/controllers/metrics.go +++ b/virtualcluster/pkg/controller/controllers/metrics.go @@ -14,8 +14,9 @@ var ( ) clustersUpdateSeconds = prometheus.NewHistogramVec( prometheus.HistogramOpts{ - Name: "clusters_update_seconds", - Help: "Duration of cluster upgrade by reconciler in featuregate.ClusterVersionApplyCurrentState", + Name: "clusters_update_seconds", + Help: "Duration of cluster upgrade by reconciler in featuregate.ClusterVersionApplyCurrentState", + Buckets: []float64{.1, .5, 1, 5, 10, 20, 30, 60, 90, 120, 300, 600, 900}, }, []string{"cluster_version", "resource_version"}, ) From 1a196e356b0c6e9adb3eab720891933de555b0d6 Mon Sep 17 00:00:00 2001 From: Maxim Muzafarov Date: Wed, 20 Jul 2022 18:11:16 +0100 Subject: [PATCH 10/13] Rename feature and ensure ETCD is not upgraded --- .../provisioner/provisioner_native.go | 30 ++++++++++++------- .../pkg/controller/controllers/suite_test.go | 2 +- .../controllers/virtualcluster_controller.go | 6 ++-- .../virtualcluster_controller_test.go | 9 ------ .../pkg/syncer/util/featuregate/gate.go | 20 ++++++------- 5 files changed, 33 insertions(+), 34 deletions(-) diff --git a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go index f43666c6..50695580 100644 --- a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go +++ b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go @@ -68,7 +68,7 @@ func NewProvisionerNative(mgr manager.Manager, log logr.Logger, provisionerTimeo } func updateLabelClusterVersionApplied(vc *tenancyv1alpha1.VirtualCluster, cv *tenancyv1alpha1.ClusterVersion) { - if featuregate.DefaultFeatureGate.Enabled(featuregate.ClusterVersionApplyCurrentState) { + if featuregate.DefaultFeatureGate.Enabled(featuregate.ClusterVersionPartialUpgrade) { if vc.Labels == nil { vc.Labels = map[string]string{} } @@ -114,6 +114,11 @@ func (mpn *Native) UpgradeVirtualCluster(ctx context.Context, vc *tenancyv1alpha return nil } updateLabelClusterVersionApplied(vc, cv) + + // We currently do not support ETCD upgrades because of amount of manual actions required + // The easiest way to achieve it - pass empty ETCD definition to the ClusterVersion + cv.Spec.ETCD = nil + return mpn.applyVirtualCluster(ctx, cv, vc) } @@ -137,22 +142,26 @@ func (mpn *Native) applyVirtualCluster(ctx context.Context, cv *tenancyv1alpha1. return err } - // 3. deploy etcd - err = mpn.deployComponent(ctx, vc, cv.Spec.ETCD, clusterCAGroup) - if err != nil { - return err + // 3. deploy etcd if defined + if cv.Spec.ETCD != nil { + err = mpn.deployComponent(ctx, vc, cv.Spec.ETCD, clusterCAGroup) + if err != nil { + return err + } } - // 4. deploy apiserver + // 4. deploy apiserver (must be defined always) err = mpn.deployComponent(ctx, vc, cv.Spec.APIServer, clusterCAGroup) if err != nil { return err } - // 5. deploy controller-manager - err = mpn.deployComponent(ctx, vc, cv.Spec.ControllerManager, clusterCAGroup) - if err != nil { - return err + // 5. deploy controller-manager if defined + if cv.Spec.ControllerManager != nil { + err = mpn.deployComponent(ctx, vc, cv.Spec.ControllerManager, clusterCAGroup) + if err != nil { + return err + } } return nil } @@ -176,7 +185,6 @@ func genInitialClusterArgs(replicas int32, stsName, svcName string) (argsVal str // complementETCDTemplate complements the ETCD template of the specified clusterversion // based on the virtual cluster setting -// etcd watches certificates and does not need watch-restart mechanism like apiserver or controller-manager func complementETCDTemplate(vcns string, etcdBdl *tenancyv1alpha1.StatefulSetSvcBundle) { etcdBdl.StatefulSet.ObjectMeta.Namespace = vcns etcdBdl.Service.ObjectMeta.Namespace = vcns diff --git a/virtualcluster/pkg/controller/controllers/suite_test.go b/virtualcluster/pkg/controller/controllers/suite_test.go index f857695b..54e35916 100644 --- a/virtualcluster/pkg/controller/controllers/suite_test.go +++ b/virtualcluster/pkg/controller/controllers/suite_test.go @@ -73,7 +73,7 @@ var _ = BeforeSuite(func() { err = apis.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) - Expect(featuregate.DefaultFeatureGate.Set(featuregate.ClusterVersionApplyCurrentState, true)).NotTo(HaveOccurred()) + Expect(featuregate.DefaultFeatureGate.Set(featuregate.ClusterVersionPartialUpgrade, true)).NotTo(HaveOccurred()) // +kubebuilder:scaffold:scheme diff --git a/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go b/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go index 6046363f..41d8f11f 100644 --- a/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go +++ b/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go @@ -71,8 +71,8 @@ func (r *ReconcileVirtualCluster) SetupWithManager(mgr ctrl.Manager, opts contro } r.Provisioner = provisioner - // Expose featuregate.ClusterVersionApplyCurrentState metrics only if it enabled - if featuregate.DefaultFeatureGate.Enabled(featuregate.ClusterVersionApplyCurrentState) { + // Expose featuregate.ClusterVersionPartialUpgrade metrics only if it enabled + if featuregate.DefaultFeatureGate.Enabled(featuregate.ClusterVersionPartialUpgrade) { metrics.Registry.MustRegister( clustersUpdatedCounter, clustersUpdateSeconds, @@ -175,7 +175,7 @@ func (r *ReconcileVirtualCluster) Reconcile(ctx context.Context, request reconci return case tenancyv1alpha1.ClusterRunning: r.Log.Info("VirtualCluster is running", "vc", vc.GetName()) - if !featuregate.DefaultFeatureGate.Enabled(featuregate.ClusterVersionApplyCurrentState) { + if !featuregate.DefaultFeatureGate.Enabled(featuregate.ClusterVersionPartialUpgrade) { return } if isReady, ok := vc.Labels[constants.LabelVCReadyForUpgrade]; !ok || isReady != "true" { diff --git a/virtualcluster/pkg/controller/controllers/virtualcluster_controller_test.go b/virtualcluster/pkg/controller/controllers/virtualcluster_controller_test.go index a5e7b558..84e1f5e5 100644 --- a/virtualcluster/pkg/controller/controllers/virtualcluster_controller_test.go +++ b/virtualcluster/pkg/controller/controllers/virtualcluster_controller_test.go @@ -179,7 +179,6 @@ var _ = Describe("VirtualCluster Controller", func() { }, time.Minute*5, interval).Should(BeTrue()) By("Updating ClusterVersion") - cvInstance.Spec.ETCD.StatefulSet.Spec.Template.Spec.Containers[0].Args = append([]string{"-debug"}, cvInstance.Spec.ETCD.StatefulSet.Spec.Template.Spec.Containers[0].Args...) cvInstance.Spec.APIServer.StatefulSet.Spec.Template.Spec.Containers[0].Args = append([]string{"-v=7"}, cvInstance.Spec.APIServer.StatefulSet.Spec.Template.Spec.Containers[0].Args...) if cvInstance.Spec.APIServer.Service.Labels == nil { cvInstance.Spec.APIServer.Service.Labels = map[string]string{} @@ -203,14 +202,6 @@ var _ = Describe("VirtualCluster Controller", func() { return !apierrors.IsNotFound(err) && svc.Labels["test-label"] == "test" }, time.Minute*2, interval).Should(BeTrue()) - etcdSts := &appsv1.StatefulSet{} - By("Control Plane etcd StatefulSet upgraded") - Eventually(func() bool { - stsObjectKey := getClusterObjectKey(instance, "etcd") - err := cli.Get(ctx, stsObjectKey, etcdSts) - return !apierrors.IsNotFound(err) && etcdSts.Spec.Template.Spec.Containers[0].Args[0] == "-debug" - }, time.Minute*2, interval).Should(BeTrue()) - apiserverSts := &appsv1.StatefulSet{} By("Control Plane apiserver StatefulSet upgraded") Eventually(func() bool { diff --git a/virtualcluster/pkg/syncer/util/featuregate/gate.go b/virtualcluster/pkg/syncer/util/featuregate/gate.go index 88ecd54b..0c55428a 100644 --- a/virtualcluster/pkg/syncer/util/featuregate/gate.go +++ b/virtualcluster/pkg/syncer/util/featuregate/gate.go @@ -65,20 +65,20 @@ const ( // accessed by the PodIP on each pod on the node VNodeProviderPodIP = "VNodeProviderPodIP" - // ClusterVersionApplyCurrentState is an experimental feature that allows the cluster provisioner + // ClusterVersionPartialUpgrade is an experimental feature that allows the cluster provisioner // to apply ClusterVersion updates if VirtualCluster object is requested it - ClusterVersionApplyCurrentState = "ClusterVersionApplyCurrentState" + ClusterVersionPartialUpgrade = "ClusterVersionPartialUpgrade" ) var defaultFeatures = FeatureList{ - SuperClusterPooling: {Default: false}, - SuperClusterServiceNetwork: {Default: false}, - SuperClusterLabelling: {Default: false}, - SuperClusterLabelFilter: {Default: false}, - VNodeProviderService: {Default: false}, - TenantAllowDNSPolicy: {Default: false}, - VNodeProviderPodIP: {Default: false}, - ClusterVersionApplyCurrentState: {Default: false}, + SuperClusterPooling: {Default: false}, + SuperClusterServiceNetwork: {Default: false}, + SuperClusterLabelling: {Default: false}, + SuperClusterLabelFilter: {Default: false}, + VNodeProviderService: {Default: false}, + TenantAllowDNSPolicy: {Default: false}, + VNodeProviderPodIP: {Default: false}, + ClusterVersionPartialUpgrade: {Default: false}, } type Feature string From 5512d3094a65f5821e0d4bcec1e02fc241423c6d Mon Sep 17 00:00:00 2001 From: Maxim Muzafarov Date: Wed, 20 Jul 2022 18:11:37 +0100 Subject: [PATCH 11/13] Add clusters_upgrade_failed counter --- .../pkg/controller/controllers/metrics.go | 19 +++++++++++++------ .../controllers/virtualcluster_controller.go | 10 ++++++---- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/virtualcluster/pkg/controller/controllers/metrics.go b/virtualcluster/pkg/controller/controllers/metrics.go index 481e7b7a..35c5905f 100644 --- a/virtualcluster/pkg/controller/controllers/metrics.go +++ b/virtualcluster/pkg/controller/controllers/metrics.go @@ -5,17 +5,24 @@ import ( ) var ( - clustersUpdatedCounter = prometheus.NewCounterVec( + clustersUpgradedCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: "clusters_updated", - Help: "Amount of clusters upgraded by reconciler in featuregate.ClusterVersionApplyCurrentState", + Name: "clusters_upgraded", + Help: "Amount of clusters upgraded by reconciler in featuregate.ClusterVersionPartialUpgrade", }, []string{"cluster_version", "resource_version"}, ) - clustersUpdateSeconds = prometheus.NewHistogramVec( + clustersUpgradeFailedCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "clusters_upgrade_failed", + Help: "Amount of clusters failed to upgrade by reconciler in featuregate.ClusterVersionPartialUpgrade", + }, + []string{"cluster_version", "resource_version"}, + ) + clustersUpgradeSeconds = prometheus.NewHistogramVec( prometheus.HistogramOpts{ - Name: "clusters_update_seconds", - Help: "Duration of cluster upgrade by reconciler in featuregate.ClusterVersionApplyCurrentState", + Name: "clusters_upgrade_seconds", + Help: "Duration of cluster upgrade by reconciler in featuregate.ClusterVersionPartialUpgrade", Buckets: []float64{.1, .5, 1, 5, 10, 20, 30, 60, 90, 120, 300, 600, 900}, }, []string{"cluster_version", "resource_version"}, diff --git a/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go b/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go index 41d8f11f..f246247c 100644 --- a/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go +++ b/virtualcluster/pkg/controller/controllers/virtualcluster_controller.go @@ -74,8 +74,9 @@ func (r *ReconcileVirtualCluster) SetupWithManager(mgr ctrl.Manager, opts contro // Expose featuregate.ClusterVersionPartialUpgrade metrics only if it enabled if featuregate.DefaultFeatureGate.Enabled(featuregate.ClusterVersionPartialUpgrade) { metrics.Registry.MustRegister( - clustersUpdatedCounter, - clustersUpdateSeconds, + clustersUpgradedCounter, + clustersUpgradeFailedCounter, + clustersUpgradeSeconds, ) } @@ -184,14 +185,15 @@ func (r *ReconcileVirtualCluster) Reconcile(ctx context.Context, request reconci r.Log.Info("VirtualCluster is ready for upgrade", "vc", vc.GetName()) upgradeStartTimestamp := time.Now() err = r.Provisioner.UpgradeVirtualCluster(ctx, vc) - clustersUpdateSeconds.WithLabelValues(vc.Spec.ClusterVersionName, vc.Labels[constants.LabelClusterVersionApplied]).Observe(time.Since(upgradeStartTimestamp).Seconds()) + clustersUpgradeSeconds.WithLabelValues(vc.Spec.ClusterVersionName, vc.Labels[constants.LabelClusterVersionApplied]).Observe(time.Since(upgradeStartTimestamp).Seconds()) if err != nil { r.Log.Error(err, "fail to upgrade virtualcluster", "vc", vc.GetName()) kubeutil.SetVCStatus(vc, tenancyv1alpha1.ClusterRunning, fmt.Sprintf("fail to upgrade: %s", err), "TenantControlPlaneUpgradeFailed") + clustersUpgradeFailedCounter.WithLabelValues(vc.Spec.ClusterVersionName, vc.Labels[constants.LabelClusterVersionApplied]).Inc() } else { r.Log.Info("upgrade finished", "vc", vc.GetName()) kubeutil.SetVCStatus(vc, tenancyv1alpha1.ClusterRunning, "tenant control plane is upgraded", "TenantControlPlaneUpgradeCompleted") - clustersUpdatedCounter.WithLabelValues(vc.Spec.ClusterVersionName, vc.Labels[constants.LabelClusterVersionApplied]).Inc() + clustersUpgradedCounter.WithLabelValues(vc.Spec.ClusterVersionName, vc.Labels[constants.LabelClusterVersionApplied]).Inc() } err = retry.RetryOnConflict(retry.DefaultRetry, func() error { From 9c4f9628fccdc9d91744783697a986f65e94ecdf Mon Sep 17 00:00:00 2001 From: Maxim Muzafarov Date: Wed, 20 Jul 2022 18:26:34 +0100 Subject: [PATCH 12/13] Do not apply ETCD upgrades, check rootCA error --- .../provisioner/provisioner_native.go | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go index 50695580..3d95559d 100644 --- a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go +++ b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go @@ -25,6 +25,7 @@ import ( "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/util/cert" "sigs.k8s.io/controller-runtime/pkg/client" @@ -90,7 +91,7 @@ func (mpn *Native) CreateVirtualCluster(ctx context.Context, vc *tenancyv1alpha1 if err != nil { return err } - return mpn.applyVirtualCluster(ctx, cv, vc) + return mpn.applyVirtualCluster(ctx, cv, vc, true) } func (mpn *Native) fetchClusterVersion(vc *tenancyv1alpha1.VirtualCluster) (*tenancyv1alpha1.ClusterVersion, error) { @@ -117,12 +118,10 @@ func (mpn *Native) UpgradeVirtualCluster(ctx context.Context, vc *tenancyv1alpha // We currently do not support ETCD upgrades because of amount of manual actions required // The easiest way to achieve it - pass empty ETCD definition to the ClusterVersion - cv.Spec.ETCD = nil - - return mpn.applyVirtualCluster(ctx, cv, vc) + return mpn.applyVirtualCluster(ctx, cv, vc, false) } -func (mpn *Native) applyVirtualCluster(ctx context.Context, cv *tenancyv1alpha1.ClusterVersion, vc *tenancyv1alpha1.VirtualCluster) error { +func (mpn *Native) applyVirtualCluster(ctx context.Context, cv *tenancyv1alpha1.ClusterVersion, vc *tenancyv1alpha1.VirtualCluster, applyETCD bool) error { var err error isClusterIP := cv.Spec.APIServer.Service != nil && cv.Spec.APIServer.Service.Spec.Type == corev1.ServiceTypeClusterIP // if ClusterIP, have to update API Server ahead of time to lay it down in the PKI @@ -143,7 +142,7 @@ func (mpn *Native) applyVirtualCluster(ctx context.Context, cv *tenancyv1alpha1. } // 3. deploy etcd if defined - if cv.Spec.ETCD != nil { + if applyETCD { err = mpn.deployComponent(ctx, vc, cv.Spec.ETCD, clusterCAGroup) if err != nil { return err @@ -328,6 +327,7 @@ func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.Vi rootCaSecret := &corev1.Secret{} err := mpn.Get(ctx, client.ObjectKey{Name: secret.RootCASecretName, Namespace: vc.Status.ClusterNamespace}, rootCaSecret) if err == nil { + // The secret is present and we can reuse it rootCACrt, rootCAErr := pkiutil.DecodeCertPEM(rootCaSecret.Data[corev1.TLSCertKey]) if rootCAErr != nil { return nil, rootCAErr @@ -343,8 +343,8 @@ func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.Vi Key: rootCAKey, } mpn.Log.Info("rootCA pair is reused from the secret") - } else { - mpn.Log.Error(err, "fail to get rootCA secret") + } else if apierrors.IsNotFound(err) { + mpn.Log.Info("rootCA secret is not found. Creating") // create root ca, all components will share a single root ca rootCACrt, rootKey, rootCAErr := pkiutil.NewCertificateAuthority( &pkiutil.CertConfig{ @@ -367,6 +367,9 @@ func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.Vi Key: rootRsaKey, } mpn.Log.Info("rootCA pair generated") + } else { + mpn.Log.Error(err, "failed to check rootCA secret existence") + return nil, err } caGroup.RootCA = rootCAPair From 653785b1825d4b0ee23f8c2500d1ca61214cd2f7 Mon Sep 17 00:00:00 2001 From: Maxim Muzafarov Date: Wed, 20 Jul 2022 18:37:36 +0100 Subject: [PATCH 13/13] Use switch case instead of ifElseChain --- .../controllers/provisioner/provisioner_native.go | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go index 3d95559d..9cc8295f 100644 --- a/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go +++ b/virtualcluster/pkg/controller/controllers/provisioner/provisioner_native.go @@ -326,26 +326,23 @@ func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.Vi // reuse rootCa if it is present rootCaSecret := &corev1.Secret{} err := mpn.Get(ctx, client.ObjectKey{Name: secret.RootCASecretName, Namespace: vc.Status.ClusterNamespace}, rootCaSecret) - if err == nil { - // The secret is present and we can reuse it + switch { + case err == nil: rootCACrt, rootCAErr := pkiutil.DecodeCertPEM(rootCaSecret.Data[corev1.TLSCertKey]) if rootCAErr != nil { return nil, rootCAErr } - rootCAKey, rootCAErr := vcpki.DecodePrivateKeyPEM(rootCaSecret.Data[corev1.TLSPrivateKeyKey]) if rootCAErr != nil { return nil, rootCAErr } - rootCAPair = &vcpki.CrtKeyPair{ Crt: rootCACrt, Key: rootCAKey, } mpn.Log.Info("rootCA pair is reused from the secret") - } else if apierrors.IsNotFound(err) { + case apierrors.IsNotFound(err): mpn.Log.Info("rootCA secret is not found. Creating") - // create root ca, all components will share a single root ca rootCACrt, rootKey, rootCAErr := pkiutil.NewCertificateAuthority( &pkiutil.CertConfig{ Config: cert.Config{ @@ -356,18 +353,16 @@ func (mpn *Native) createAndApplyPKI(ctx context.Context, vc *tenancyv1alpha1.Vi if rootCAErr != nil { return nil, rootCAErr } - rootRsaKey, ok := rootKey.(*rsa.PrivateKey) if !ok { return nil, errors.New("fail to assert rsa PrivateKey") } - rootCAPair = &vcpki.CrtKeyPair{ Crt: rootCACrt, Key: rootRsaKey, } mpn.Log.Info("rootCA pair generated") - } else { + default: mpn.Log.Error(err, "failed to check rootCA secret existence") return nil, err }