Skip to content

Commit 2e8fa25

Browse files
feat(nodeadm): enable CDI for 1.32+ (#2173)
1 parent 2b4adc5 commit 2e8fa25

File tree

14 files changed

+94
-37
lines changed

14 files changed

+94
-37
lines changed

nodeadm/cmd/nodeadm/init/init.go

+7
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,13 @@ func (c *initCmd) Run(log *zap.Logger, opts *cli.GlobalOptions) error {
149149
// Various initializations and verifications of the NodeConfig and
150150
// perform in-place updates when allowed by the user
151151
func enrichConfig(log *zap.Logger, cfg *api.NodeConfig) error {
152+
log.Info("Fetching kubelet version..")
153+
kubeletVersion, err := kubelet.GetKubeletVersion()
154+
if err != nil {
155+
return err
156+
}
157+
cfg.Status.KubeletVersion = kubeletVersion
158+
log.Info("Fetched kubelet version", zap.String("version", kubeletVersion))
152159
log.Info("Fetching instance details..")
153160
awsConfig, err := config.LoadDefaultConfig(context.TODO(),
154161
config.WithClientLogMode(aws.LogRetries),

nodeadm/internal/api/merge_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ discard_unpacked_layers = true
119119
120120
[plugins."io.containerd.grpc.v1.cri"]
121121
sandbox_image = "{{.SandboxImage}}"
122+
enable_cdi = true
122123
123124
[plugins."io.containerd.grpc.v1.cri".registry]
124125
config_path = "/etc/containerd/certs.d:/etc/docker/certs.d"
@@ -195,6 +196,7 @@ discard_unpacked_layers = false
195196
196197
[plugins."io.containerd.grpc.v1.cri"]
197198
sandbox_image = "{{.SandboxImage}}"
199+
enable_cdi = true
198200
199201
[plugins."io.containerd.grpc.v1.cri".registry]
200202
config_path = "/etc/containerd/certs.d:/etc/docker/certs.d"

nodeadm/internal/api/types.go

+5-4
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ type NodeConfigSpec struct {
3636
}
3737

3838
type NodeConfigStatus struct {
39-
Instance InstanceDetails `json:"instance,omitempty"`
40-
Defaults DefaultOptions `json:"default,omitempty"`
39+
Instance InstanceDetails `json:"instance,omitempty"`
40+
Defaults DefaultOptions `json:"default,omitempty"`
41+
KubeletVersion string `json:"kubeletVersion,omitempty"`
4142
}
4243

4344
type InstanceDetails struct {
@@ -102,9 +103,9 @@ type LocalStorageOptions struct {
102103
type LocalStorageStrategy string
103104

104105
const (
105-
LocalStorageRAID0 LocalStorageStrategy = "RAID0"
106+
LocalStorageRAID0 LocalStorageStrategy = "RAID0"
106107
LocalStorageRAID10 LocalStorageStrategy = "RAID10"
107-
LocalStorageMount LocalStorageStrategy = "Mount"
108+
LocalStorageMount LocalStorageStrategy = "Mount"
108109
)
109110

110111
type Feature string

nodeadm/internal/containerd/config.go

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"github.com/awslabs/amazon-eks-ami/nodeadm/internal/util"
1010
"github.com/pelletier/go-toml/v2"
1111
"go.uber.org/zap"
12+
"golang.org/x/mod/semver"
1213
)
1314

1415
const ContainerRuntimeEndpoint = "unix:///run/containerd/containerd.sock"
@@ -25,6 +26,7 @@ var (
2526
)
2627

2728
type containerdTemplateVars struct {
29+
EnableCDI bool
2830
SandboxImage string
2931
RuntimeName string
3032
RuntimeBinaryName string
@@ -65,6 +67,7 @@ func generateContainerdConfig(cfg *api.NodeConfig) ([]byte, error) {
6567
SandboxImage: cfg.Status.Defaults.SandboxImage,
6668
RuntimeBinaryName: instanceOptions.RuntimeBinaryName,
6769
RuntimeName: instanceOptions.RuntimeName,
70+
EnableCDI: semver.Compare(cfg.Status.KubeletVersion, "v1.32.0") >= 0,
6871
}
6972
var buf bytes.Buffer
7073
if err := containerdConfigTemplate.Execute(&buf, configVars); err != nil {

nodeadm/internal/containerd/config.template.toml

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ discard_unpacked_layers = true
1111

1212
[plugins."io.containerd.grpc.v1.cri"]
1313
sandbox_image = "{{.SandboxImage}}"
14+
enable_cdi = {{.EnableCDI}}
1415

1516
[plugins."io.containerd.grpc.v1.cri".registry]
1617
config_path = "/etc/containerd/certs.d:/etc/docker/certs.d"

nodeadm/internal/kubelet/config.go

+12-23
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,9 @@ const (
3636
)
3737

3838
func (k *kubelet) writeKubeletConfig(cfg *api.NodeConfig) error {
39-
kubeletVersion, err := GetKubeletVersion()
40-
if err != nil {
41-
return err
42-
}
4339
// tracking: https://github.com/kubernetes/enhancements/issues/3983
4440
// for enabling drop-in configuration
45-
if semver.Compare(kubeletVersion, "v1.29.0") < 0 {
41+
if semver.Compare(cfg.Status.KubeletVersion, "v1.29.0") < 0 {
4642
return k.writeKubeletConfigToFile(cfg)
4743
} else {
4844
return k.writeKubeletConfigToDir(cfg)
@@ -211,9 +207,9 @@ func (ksc *kubeletConfig) withNodeIp(cfg *api.NodeConfig, flags map[string]strin
211207
return nil
212208
}
213209

214-
func (ksc *kubeletConfig) withVersionToggles(kubeletVersion string, flags map[string]string) {
210+
func (ksc *kubeletConfig) withVersionToggles(cfg *api.NodeConfig, flags map[string]string) {
215211
// TODO: remove when 1.26 is EOL
216-
if semver.Compare(kubeletVersion, "v1.27.0") < 0 {
212+
if semver.Compare(cfg.Status.KubeletVersion, "v1.27.0") < 0 {
217213
// --container-runtime flag is gone in 1.27+
218214
flags["container-runtime"] = "remote"
219215
// --container-runtime-endpoint moved to kubelet config start from 1.27
@@ -223,20 +219,20 @@ func (ksc *kubeletConfig) withVersionToggles(kubeletVersion string, flags map[st
223219

224220
// TODO: Remove this during 1.27 EOL
225221
// Enable Feature Gate for KubeletCredentialProviders in versions less than 1.28 since this feature flag was removed in 1.28.
226-
if semver.Compare(kubeletVersion, "v1.28.0") < 0 {
222+
if semver.Compare(cfg.Status.KubeletVersion, "v1.28.0") < 0 {
227223
ksc.FeatureGates["KubeletCredentialProviders"] = true
228224
}
229225

230226
// for K8s versions that suport API Priority & Fairness, increase our API server QPS
231227
// in 1.27, the default is already increased to 50/100, so use the higher defaults
232-
if semver.Compare(kubeletVersion, "v1.22.0") >= 0 && semver.Compare(kubeletVersion, "v1.27.0") < 0 {
228+
if semver.Compare(cfg.Status.KubeletVersion, "v1.22.0") >= 0 && semver.Compare(cfg.Status.KubeletVersion, "v1.27.0") < 0 {
233229
ksc.KubeAPIQPS = ptr.Int(10)
234230
ksc.KubeAPIBurst = ptr.Int(20)
235231
}
236232
}
237233

238-
func (ksc *kubeletConfig) withCloudProvider(kubeletVersion string, cfg *api.NodeConfig, flags map[string]string) {
239-
if semver.Compare(kubeletVersion, "v1.26.0") >= 0 {
234+
func (ksc *kubeletConfig) withCloudProvider(cfg *api.NodeConfig, flags map[string]string) {
235+
if semver.Compare(cfg.Status.KubeletVersion, "v1.26.0") >= 0 {
240236
// ref: https://github.com/kubernetes/kubernetes/pull/121367
241237
flags["cloud-provider"] = "external"
242238
// provider ID needs to be specified when the cloud provider is external
@@ -280,24 +276,17 @@ func (ksc *kubeletConfig) withDefaultReservedResources(cfg *api.NodeConfig) {
280276
//
281277
// TODO: revisit once the minimum supportted version catches up or the container
282278
// runtime is moved to containerd 2.0
283-
func (ksc *kubeletConfig) withPodInfraContainerImage(cfg *api.NodeConfig, kubeletVersion string, flags map[string]string) error {
279+
func (ksc *kubeletConfig) withPodInfraContainerImage(cfg *api.NodeConfig, flags map[string]string) error {
284280
// the flag is a noop on 1.29+, since the behavior was changed to use the
285281
// CRI image pinning behavior and no longer considers the flag value.
286282
// see: https://github.com/kubernetes/kubernetes/pull/118544
287-
if semver.Compare(kubeletVersion, "v1.29.0") < 0 {
283+
if semver.Compare(cfg.Status.KubeletVersion, "v1.29.0") < 0 {
288284
flags["pod-infra-container-image"] = cfg.Status.Defaults.SandboxImage
289285
}
290286
return nil
291287
}
292288

293289
func (k *kubelet) GenerateKubeletConfig(cfg *api.NodeConfig) (*kubeletConfig, error) {
294-
// Get the kubelet/kubernetes version to help conditionally enable features
295-
kubeletVersion, err := GetKubeletVersion()
296-
if err != nil {
297-
return nil, err
298-
}
299-
zap.L().Info("Detected kubelet version", zap.String("version", kubeletVersion))
300-
301290
kubeletConfig := defaultKubeletSubConfig()
302291

303292
if err := kubeletConfig.withFallbackClusterDns(&cfg.Spec.Cluster); err != nil {
@@ -309,12 +298,12 @@ func (k *kubelet) GenerateKubeletConfig(cfg *api.NodeConfig) (*kubeletConfig, er
309298
if err := kubeletConfig.withNodeIp(cfg, k.flags); err != nil {
310299
return nil, err
311300
}
312-
if err := kubeletConfig.withPodInfraContainerImage(cfg, kubeletVersion, k.flags); err != nil {
301+
if err := kubeletConfig.withPodInfraContainerImage(cfg, k.flags); err != nil {
313302
return nil, err
314303
}
315304

316-
kubeletConfig.withVersionToggles(kubeletVersion, k.flags)
317-
kubeletConfig.withCloudProvider(kubeletVersion, cfg, k.flags)
305+
kubeletConfig.withVersionToggles(cfg, k.flags)
306+
kubeletConfig.withCloudProvider(cfg, k.flags)
318307
kubeletConfig.withDefaultReservedResources(cfg)
319308

320309
return &kubeletConfig, nil

nodeadm/internal/kubelet/config_test.go

+20-4
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,12 @@ func TestKubeletCredentialProvidersFeatureFlag(t *testing.T) {
2121

2222
for _, test := range tests {
2323
kubetConfig := defaultKubeletSubConfig()
24-
kubetConfig.withVersionToggles(test.kubeletVersion, make(map[string]string))
24+
nodeConfig := api.NodeConfig{
25+
Status: api.NodeConfigStatus{
26+
KubeletVersion: test.kubeletVersion,
27+
},
28+
}
29+
kubetConfig.withVersionToggles(&nodeConfig, make(map[string]string))
2530
kubeletCredentialProviders, present := kubetConfig.FeatureGates["KubeletCredentialProviders"]
2631
if test.expectedValue == nil && present {
2732
t.Errorf("KubeletCredentialProviders shouldn't be set for versions %s", test.kubeletVersion)
@@ -44,7 +49,12 @@ func TestContainerRuntime(t *testing.T) {
4449
for _, test := range tests {
4550
kubeletAruments := make(map[string]string)
4651
kubetConfig := defaultKubeletSubConfig()
47-
kubetConfig.withVersionToggles(test.kubeletVersion, kubeletAruments)
52+
nodeConfig := api.NodeConfig{
53+
Status: api.NodeConfigStatus{
54+
KubeletVersion: test.kubeletVersion,
55+
},
56+
}
57+
kubetConfig.withVersionToggles(&nodeConfig, kubeletAruments)
4858
containerRuntime, present := kubeletAruments["container-runtime"]
4959
if test.expectedContainerRuntime == nil {
5060
if present {
@@ -78,7 +88,12 @@ func TestKubeAPILimits(t *testing.T) {
7888

7989
for _, test := range tests {
8090
kubetConfig := defaultKubeletSubConfig()
81-
kubetConfig.withVersionToggles(test.kubeletVersion, make(map[string]string))
91+
nodeConfig := api.NodeConfig{
92+
Status: api.NodeConfigStatus{
93+
KubeletVersion: test.kubeletVersion,
94+
},
95+
}
96+
kubetConfig.withVersionToggles(&nodeConfig, make(map[string]string))
8297
assert.Equal(t, test.expectedKubeAPIQS, kubetConfig.KubeAPIQPS)
8398
assert.Equal(t, test.expectedKubeAPIBurst, kubetConfig.KubeAPIBurst)
8499
}
@@ -108,7 +123,8 @@ func TestProviderID(t *testing.T) {
108123
for _, test := range tests {
109124
kubeletAruments := make(map[string]string)
110125
kubetConfig := defaultKubeletSubConfig()
111-
kubetConfig.withCloudProvider(test.kubeletVersion, &nodeConfig, kubeletAruments)
126+
nodeConfig.Status.KubeletVersion = test.kubeletVersion
127+
kubetConfig.withCloudProvider(&nodeConfig, kubeletAruments)
112128
assert.Equal(t, test.expectedCloudProvider, kubeletAruments["cloud-provider"])
113129
if kubeletAruments["cloud-provider"] == "external" {
114130
assert.Equal(t, *kubetConfig.ProviderID, providerId)

nodeadm/internal/kubelet/image-credential-provider.go

+1-5
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,7 @@ func generateImageCredentialProviderConfig(cfg *api.NodeConfig, ecrCredentialPro
6464
templateVars := imageCredentialProviderTemplateVars{
6565
EcrProviderName: filepath.Base(ecrCredentialProviderBinPath),
6666
}
67-
kubeletVersion, err := GetKubeletVersion()
68-
if err != nil {
69-
return nil, err
70-
}
71-
if semver.Compare(kubeletVersion, "v1.27.0") < 0 {
67+
if semver.Compare(cfg.Status.KubeletVersion, "v1.27.0") < 0 {
7268
templateVars.ConfigApiVersion = "kubelet.config.k8s.io/v1alpha1"
7369
templateVars.ProviderApiVersion = "credentialprovider.kubelet.k8s.io/v1alpha1"
7470
} else {

nodeadm/internal/kubelet/version.go

+4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55
"os"
66
"os/exec"
77
"regexp"
8+
9+
"go.uber.org/zap"
810
)
911

1012
func GetKubeletVersion() (string, error) {
@@ -20,10 +22,12 @@ const kubeletVersionFile = "/etc/eks/kubelet-version.txt"
2022

2123
func GetKubeletVersionRaw() ([]byte, error) {
2224
if _, err := os.Stat(kubeletVersionFile); errors.Is(err, os.ErrNotExist) {
25+
zap.L().Info("Reading kubelet version from executable")
2326
return exec.Command("kubelet", "--version").Output()
2427
} else if err != nil {
2528
return nil, err
2629
}
30+
zap.L().Info("Reading kubelet version from file", zap.String("path", kubeletVersionFile))
2731
return os.ReadFile(kubeletVersionFile)
2832
}
2933

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
root = '/var/lib/containerd'
2+
state = '/run/containerd'
3+
version = 2
4+
5+
[grpc]
6+
address = '/run/foo/foo.sock'
7+
8+
[plugins]
9+
[plugins.'io.containerd.grpc.v1.cri']
10+
enable_cdi = false
11+
sandbox_image = 'localhost/kubernetes/pause'
12+
13+
[plugins.'io.containerd.grpc.v1.cri'.cni]
14+
bin_dir = '/opt/cni/bin'
15+
conf_dir = '/etc/cni/net.d'
16+
17+
[plugins.'io.containerd.grpc.v1.cri'.containerd]
18+
default_runtime_name = 'runc'
19+
discard_unpacked_layers = false
20+
21+
[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes]
22+
[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes.runc]
23+
base_runtime_spec = '/etc/containerd/base-runtime-spec.json'
24+
runtime_type = 'io.containerd.runc.v2'
25+
26+
[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes.runc.options]
27+
BinaryName = '/usr/sbin/runc'
28+
SystemdCgroup = true
29+
30+
[plugins.'io.containerd.grpc.v1.cri'.registry]
31+
config_path = '/etc/containerd/certs.d:/etc/docker/certs.d'

nodeadm/test/e2e/cases/containerd-config/expected-containerd-config.toml

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ address = '/run/foo/foo.sock'
77

88
[plugins]
99
[plugins.'io.containerd.grpc.v1.cri']
10+
enable_cdi = true
1011
sandbox_image = 'localhost/kubernetes/pause'
1112

1213
[plugins.'io.containerd.grpc.v1.cri'.cni]

nodeadm/test/e2e/cases/containerd-config/run.sh

+5-1
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,13 @@ set -o pipefail
77
source /helpers.sh
88

99
mock::aws
10-
mock::kubelet 1.27.0
1110
wait::dbus-ready
1211

12+
mock::kubelet 1.31.0
1313
nodeadm init --skip run --config-source file://config.yaml
14+
assert::files-equal /etc/containerd/config.toml expected-containerd-config-pre-1.32.toml
1415

16+
# enable_cdi defaults to true in 1.32+
17+
mock::kubelet 1.32.0
18+
nodeadm init --skip run --config-source file://config.yaml
1519
assert::files-equal /etc/containerd/config.toml expected-containerd-config.toml

nodeadm/test/e2e/cases/containerd-runtime-config-neuron/expected-containerd-config.toml

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ address = '/run/foo/foo.sock'
77

88
[plugins]
99
[plugins.'io.containerd.grpc.v1.cri']
10+
enable_cdi = false
1011
sandbox_image = 'localhost/kubernetes/pause'
1112

1213
[plugins.'io.containerd.grpc.v1.cri'.cni]

nodeadm/test/e2e/cases/containerd-runtime-config-nvidia/expected-containerd-config.toml

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ address = '/run/foo/foo.sock'
77

88
[plugins]
99
[plugins.'io.containerd.grpc.v1.cri']
10+
enable_cdi = false
1011
sandbox_image = 'localhost/kubernetes/pause'
1112

1213
[plugins.'io.containerd.grpc.v1.cri'.cni]

0 commit comments

Comments
 (0)