Skip to content

Commit 969ba95

Browse files
committed
feat: Support HelmAddon strategy to deploy NFD
1 parent 7ace458 commit 969ba95

File tree

16 files changed

+453
-101
lines changed

16 files changed

+453
-101
lines changed

api/v1alpha1/addon_types.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,26 @@ func (CNI) VariableSchema() clusterv1.VariableSchema {
8080
}
8181

8282
// NFD tells us to enable or disable the node feature discovery addon.
83-
type NFD struct{}
83+
type NFD struct {
84+
// +optional
85+
Strategy AddonStrategy `json:"strategy,omitempty"`
86+
}
8487

8588
func (NFD) VariableSchema() clusterv1.VariableSchema {
8689
return clusterv1.VariableSchema{
8790
OpenAPIV3Schema: clusterv1.JSONSchemaProps{
8891
Type: "object",
92+
Properties: map[string]clusterv1.JSONSchemaProps{
93+
"strategy": {
94+
Description: "Addon strategy used to deploy the CNI provider to the workload cluster",
95+
Type: "string",
96+
Enum: variables.MustMarshalValuesToEnumJSON(
97+
AddonStrategyClusterResourceSet,
98+
AddonStrategyHelmAddon,
99+
),
100+
},
101+
},
102+
Required: []string{"strategy"},
89103
},
90104
}
91105
}

api/v1alpha1/constants.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
package v1alpha1
55

66
const (
7-
// CNIVariableName is the external patch variable name.
7+
// CNIVariableName is the CNI external patch variable name.
88
CNIVariableName = "cni"
9+
// NFDVariableName is the NFD external patch variable name.
10+
NFDVariableName = "nfd"
911
// AWSVariableName is the AWS config patch variable name.
1012
AWSVariableName = "aws"
1113
)

charts/capi-runtime-extensions/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ A Helm chart for capi-runtime-extensions
4646
| hooks.cni.cilium.crsStrategy.defaultCiliumConfigMap.name | string | `"cilium"` | |
4747
| hooks.cni.cilium.helmAddonStrategy.defaultValueTemplateConfigMap.create | bool | `true` | |
4848
| hooks.cni.cilium.helmAddonStrategy.defaultValueTemplateConfigMap.name | string | `"default-cilium-cni-helm-values-template"` | |
49+
| hooks.nfd.crsStrategy.defaultInstallationConfigMap.name | string | `"node-feature-discovery"` | |
50+
| hooks.nfd.helmAddonStrategy.defaultValueTemplateConfigMap.create | bool | `true` | |
51+
| hooks.nfd.helmAddonStrategy.defaultValueTemplateConfigMap.name | string | `"default-nfd-helm-values-template"` | |
4952
| image.pullPolicy | string | `"IfNotPresent"` | |
5053
| image.repository | string | `"ghcr.io/d2iq-labs/capi-runtime-extensions"` | |
5154
| image.tag | string | `""` | |

charts/capi-runtime-extensions/templates/deployment.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ spec:
3434
- --cni.cilium.crs.defaults-namespace={{ .Release.Namespace }}
3535
- --cni.cilium.helm-addon.defaults-namespace={{ .Release.Namespace }}
3636
- --cni.cilium.helm-addon.default-values-template-configmap-name={{ .Values.hooks.cni.cilium.helmAddonStrategy.defaultValueTemplateConfigMap.name }}
37-
- --nfd.defaults-namespace={{ .Release.Namespace }}
37+
- --nfd.crs.defaults-namespace={{ .Release.Namespace }}
38+
- --nfd.helm-addon.defaults-namespace={{ .Release.Namespace }}
39+
- --nfd.helm-addon.default-values-template-configmap-name={{ .Values.hooks.nfd.helmAddonStrategy.defaultValueTemplateConfigMap.name }}
3840
{{- range $key, $value := .Values.extraArgs }}
3941
- --{{ $key }}={{ $value }}
4042
{{- end }}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Copyright 2024 D2iQ, Inc. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
{{- if .Values.hooks.nfd.helmAddonStrategy.defaultValueTemplateConfigMap.create }}
5+
apiVersion: v1
6+
kind: ConfigMap
7+
metadata:
8+
name: '{{ .Values.hooks.nfd.helmAddonStrategy.defaultValueTemplateConfigMap.name }}'
9+
data:
10+
values.yaml: |-
11+
master:
12+
extraLabelNs:
13+
- nvidia.com
14+
- beta.amd.com
15+
- amd.com
16+
17+
worker: ### <NFD-WORKER-CONF-START-DO-NOT-REMOVE>
18+
config:
19+
sources:
20+
pci:
21+
deviceLabelFields:
22+
- "class"
23+
- "vendor"
24+
tolerations:
25+
- effect: NoSchedule
26+
key: node-role.kubernetes.io/master
27+
- effect: NoSchedule
28+
key: node-role.kubernetes.io/control-plane
29+
### <NFD-WORKER-CONF-END-DO-NOT-REMOVE>
30+
{{- end -}}

charts/capi-runtime-extensions/templates/nfd/manifests/node-feature-discovery-configmap.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -924,4 +924,4 @@ data:
924924
kind: ConfigMap
925925
metadata:
926926
creationTimestamp: null
927-
name: node-feature-discovery
927+
name: '{{ .Values.hooks.nfd.crsStrategy.defaultInstallationConfigMap.name }}'

charts/capi-runtime-extensions/values.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,14 @@ hooks:
3535
defaultValueTemplateConfigMap:
3636
create: true
3737
name: default-cilium-cni-helm-values-template
38+
nfd:
39+
crsStrategy:
40+
defaultInstallationConfigMap:
41+
name: node-feature-discovery
42+
helmAddonStrategy:
43+
defaultValueTemplateConfigMap:
44+
create: true
45+
name: default-nfd-helm-values-template
3846

3947
deployDefaultClusterClasses: true
4048

docs/content/addons/nfd.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,6 @@ spec:
2525
- name: clusterConfig
2626
value:
2727
addons:
28-
nfd: {}
28+
nfd:
29+
strategy: HelmAddon
2930
```

hack/addons/update-node-feature-discovery-manifests.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ envsubst -no-unset <"${KUSTOMIZE_BASE_DIR}/kustomization.yaml.tmpl" >"${ASSETS_D
2424
cp "${KUSTOMIZE_BASE_DIR}"/*.yaml "${ASSETS_DIR}"
2525
kustomize build --enable-helm "${ASSETS_DIR}" >"${ASSETS_DIR}/${FILE_NAME}"
2626

27-
kubectl create configmap node-feature-discovery --dry-run=client --output yaml \
27+
kubectl create configmap "{{ .Values.hooks.nfd.crsStrategy.defaultInstallationConfigMap.name }}" --dry-run=client --output yaml \
2828
--from-file "${ASSETS_DIR}/${FILE_NAME}" \
2929
>"${ASSETS_DIR}/node-feature-discovery-configmap.yaml"
3030

make/addons.mk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
export CALICO_VERSION := $(shell goprintconst -file pkg/handlers/generic/lifecycle/cni/calico/strategy_helmaddon.go -name defaultCalicoHelmChartVersion)
55
export CILIUM_VERSION := $(shell goprintconst -file pkg/handlers/generic/lifecycle/cni/cilium/strategy_helmaddon.go -name defaultCiliumHelmChartVersion)
6-
export NODE_FEATURE_DISCOVERY_VERSION := 0.14.1
6+
export NODE_FEATURE_DISCOVERY_VERSION := $(shell goprintconst -file pkg/handlers/generic/lifecycle/nfd/strategy_helmaddon.go -name defaultHelmChartVersion)
77
export AWS_CSI_SNAPSHOT_CONTROLLER_VERSION := v6.3.0
88
export AWS_EBS_CSI_VERSION := v1.25.0
99
# a map of AWS CPI versions

pkg/handlers/generic/lifecycle/handlers.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import (
2222
type Handlers struct {
2323
calicoCNIConfig *calico.CNIConfig
2424
ciliumCNIConfig *cilium.CNIConfig
25-
nfdConfig *nfd.NFDConfig
25+
nfdConfig *nfd.Config
2626
ebsConfig *awsebs.AWSEBSConfig
2727
awsCPIConfig *awscpi.AWSCPIConfig
2828
}
@@ -31,7 +31,7 @@ func New() *Handlers {
3131
return &Handlers{
3232
calicoCNIConfig: &calico.CNIConfig{},
3333
ciliumCNIConfig: &cilium.CNIConfig{},
34-
nfdConfig: &nfd.NFDConfig{},
34+
nfdConfig: &nfd.Config{},
3535
ebsConfig: &awsebs.AWSEBSConfig{},
3636
awsCPIConfig: &awscpi.AWSCPIConfig{},
3737
}

pkg/handlers/generic/lifecycle/nfd/handler.go

Lines changed: 50 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -7,62 +7,55 @@ import (
77
"context"
88
"fmt"
99

10+
"github.com/go-logr/logr"
1011
"github.com/spf13/pflag"
11-
corev1 "k8s.io/api/core/v1"
12-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13-
capiv1 "sigs.k8s.io/cluster-api/api/v1beta1"
1412
runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1"
1513
ctrl "sigs.k8s.io/controller-runtime"
1614
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
1715

1816
"github.com/d2iq-labs/capi-runtime-extensions/api/v1alpha1"
17+
commonhandlers "github.com/d2iq-labs/capi-runtime-extensions/common/pkg/capi/clustertopology/handlers"
18+
"github.com/d2iq-labs/capi-runtime-extensions/common/pkg/capi/clustertopology/handlers/lifecycle"
1919
"github.com/d2iq-labs/capi-runtime-extensions/common/pkg/capi/clustertopology/variables"
20-
"github.com/d2iq-labs/capi-runtime-extensions/common/pkg/k8s/client"
2120
"github.com/d2iq-labs/capi-runtime-extensions/pkg/handlers/generic/clusterconfig"
22-
"github.com/d2iq-labs/capi-runtime-extensions/pkg/handlers/generic/lifecycle/utils"
2321
)
2422

25-
type NFDConfig struct {
26-
defaultsNamespace string
27-
defaultNFDConfigMap string
23+
type addonStrategy interface {
24+
apply(context.Context, *runtimehooksv1.AfterControlPlaneInitializedRequest, logr.Logger) error
25+
}
26+
27+
type Config struct {
28+
crsConfig crsConfig
29+
helmAddonConfig helmAddonConfig
30+
}
31+
32+
func (c *Config) AddFlags(prefix string, flags *pflag.FlagSet) {
33+
c.crsConfig.AddFlags(prefix+".crs", flags)
34+
c.helmAddonConfig.AddFlags(prefix+".helm-addon", flags)
2835
}
2936

3037
type DefaultNFD struct {
3138
client ctrlclient.Client
32-
config *NFDConfig
39+
config *Config
3340

3441
variableName string // points to the global config variable
3542
variablePath []string // path of this variable on the global config variable
3643
}
3744

38-
func (n *NFDConfig) AddFlags(prefix string, flags *pflag.FlagSet) {
39-
flags.StringVar(
40-
&n.defaultsNamespace,
41-
prefix+".defaults-namespace",
42-
corev1.NamespaceDefault,
43-
"namespace location of ConfigMap used to deploy Node Feature Discovery (NFD).",
44-
)
45-
flags.StringVar(
46-
&n.defaultNFDConfigMap,
47-
prefix+".default-nfd-configmap-name",
48-
"node-feature-discovery",
49-
"name of the ConfigMap used to deploy Node Feature Discovery (NFD)",
50-
)
51-
}
52-
53-
const (
54-
variableName = "nfd"
45+
var (
46+
_ commonhandlers.Named = &DefaultNFD{}
47+
_ lifecycle.AfterControlPlaneInitialized = &DefaultNFD{}
5548
)
5649

5750
func New(
5851
c ctrlclient.Client,
59-
cfg *NFDConfig,
52+
cfg *Config,
6053
) *DefaultNFD {
6154
return &DefaultNFD{
6255
client: c,
6356
config: cfg,
6457
variableName: clusterconfig.MetaVariableName,
65-
variablePath: []string{"addons", variableName},
58+
variablePath: []string{"addons", v1alpha1.NFDVariableName},
6659
}
6760
}
6861

@@ -81,77 +74,51 @@ func (n *DefaultNFD) AfterControlPlaneInitialized(
8174
"cluster",
8275
clusterKey,
8376
)
77+
8478
varMap := variables.ClusterVariablesToVariablesMap(req.Cluster.Spec.Topology.Variables)
8579

86-
_, found, err := variables.Get[v1alpha1.NFD](varMap, n.variableName, n.variablePath...)
80+
cniVar, found, err := variables.Get[v1alpha1.NFD](varMap, n.variableName, n.variablePath...)
8781
if err != nil {
82+
log.Error(
83+
err,
84+
"failed to read NFD variable from cluster definition",
85+
)
8886
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
89-
log.Error(err, "failed to get NFD variable")
87+
resp.SetMessage(
88+
fmt.Sprintf("failed to read NFD variable from cluster definition: %v",
89+
err,
90+
),
91+
)
9092
return
9193
}
92-
// If the variable isn't there or disabled we can ignore it.
9394
if !found {
94-
log.V(4).Info(
95-
"Skipping NFD handler. Not specified in cluster config.",
96-
)
95+
log.Info("Skipping NFD handler, cluster does not specify request NFDaddon deployment")
9796
return
9897
}
9998

100-
cm, err := n.ensureNFDConfigMapForCluster(ctx, &req.Cluster)
101-
if err != nil {
99+
var strategy addonStrategy
100+
switch cniVar.Strategy {
101+
case v1alpha1.AddonStrategyClusterResourceSet:
102+
strategy = crsStrategy{
103+
config: n.config.crsConfig,
104+
client: n.client,
105+
}
106+
case v1alpha1.AddonStrategyHelmAddon:
107+
strategy = helmAddonStrategy{
108+
config: n.config.helmAddonConfig,
109+
client: n.client,
110+
}
111+
default:
102112
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
103-
log.Error(err, "failed to apply NFD ConfigMap for cluster")
113+
resp.SetMessage(fmt.Sprintf("unknown NFD addon deployment strategy %q", cniVar.Strategy))
104114
return
105115
}
106-
err = utils.EnsureCRSForClusterFromConfigMaps(
107-
ctx,
108-
cm.Name+"-"+req.Cluster.Name,
109-
n.client,
110-
&req.Cluster,
111-
cm,
112-
)
113-
if err != nil {
116+
117+
if err := strategy.apply(ctx, req, log); err != nil {
114118
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
115-
log.Error(err, "failed to apply NFD ClusterResourceSet for cluster")
119+
resp.SetMessage(err.Error())
116120
return
117121
}
118122

119123
resp.SetStatus(runtimehooksv1.ResponseStatusSuccess)
120124
}
121-
122-
// ensureNFDConfigMapForCluster is a private function that creates a configMap for the cluster.
123-
func (n *DefaultNFD) ensureNFDConfigMapForCluster(
124-
ctx context.Context,
125-
cluster *capiv1.Cluster,
126-
) (*corev1.ConfigMap, error) {
127-
key := ctrlclient.ObjectKey{
128-
Namespace: n.config.defaultsNamespace,
129-
Name: n.config.defaultNFDConfigMap,
130-
}
131-
cm := &corev1.ConfigMap{}
132-
err := n.client.Get(ctx, key, cm)
133-
if err != nil {
134-
return nil, fmt.Errorf(
135-
"failed to fetch the configmap specified by %v: %w",
136-
n.config,
137-
err,
138-
)
139-
}
140-
// Base configmap is there now we create one in the cluster namespace if needed.
141-
cmForCluster := &corev1.ConfigMap{
142-
TypeMeta: metav1.TypeMeta{
143-
APIVersion: corev1.SchemeGroupVersion.String(),
144-
Kind: "ConfigMap",
145-
},
146-
ObjectMeta: metav1.ObjectMeta{
147-
Namespace: cluster.Namespace,
148-
Name: n.config.defaultNFDConfigMap,
149-
},
150-
Data: cm.Data,
151-
}
152-
err = client.ServerSideApply(ctx, n.client, cmForCluster)
153-
if err != nil {
154-
return nil, fmt.Errorf("failed to apply NFD ConfigMap for cluster: %w", err)
155-
}
156-
return cmForCluster, nil
157-
}

0 commit comments

Comments
 (0)