Skip to content

Commit b7be71c

Browse files
authored
fix(eks): can't deploy with Bottlerocket amiType (#17775)
As `Nodegroup` now supports different AMI types including Bottlerocket for both x86_64 and ARM_64, we cannot determine correct amiType simply from the `instanceTypes` property(#17641 ). However, when `instanceTypes` are provided, we still can check: 1. if instance types of different CPU architectures are mxied and throw an error 2. if the provided `amyType` compatible with the instanceTypes If user opt in Bottlerocket or any other AMI types other than AL2, users have to specify the `amiType` explicitly. If it's unspecified, we will use AL2 implicitly to avoid breaking changes, which is the default behavior previously. The only case `amiType` has to be undefined is that when custom AMI is defined in the launch template. As we can't check this case, users have to explicitly leave it undefined. We add a notice in the property doc string for this. Related to #12441 Fixes: #17641 ---- *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
1 parent fd0f453 commit b7be71c

File tree

2 files changed

+350
-33
lines changed

2 files changed

+350
-33
lines changed

packages/@aws-cdk/aws-eks/lib/managed-nodegroup.ts

+74-31
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
1-
import { InstanceType, ISecurityGroup, SubnetSelection } from '@aws-cdk/aws-ec2';
1+
import { InstanceType, ISecurityGroup, SubnetSelection, InstanceArchitecture } from '@aws-cdk/aws-ec2';
22
import { IRole, ManagedPolicy, Role, ServicePrincipal } from '@aws-cdk/aws-iam';
33
import { IResource, Resource, Annotations, withResolved } from '@aws-cdk/core';
44
import { Construct } from 'constructs';
55
import { Cluster, ICluster } from './cluster';
66
import { CfnNodegroup } from './eks.generated';
7-
import { INSTANCE_TYPES } from './instance-types';
87

98
/**
109
* NodeGroup interface
@@ -157,9 +156,10 @@ export interface NodegroupOptions {
157156
*/
158157
readonly subnets?: SubnetSelection;
159158
/**
160-
* The AMI type for your node group.
159+
* The AMI type for your node group. If you explicitly specify the launchTemplate with custom AMI, do not specify this property, or
160+
* the node group deployment will fail. In other cases, you will need to specify correct amiType for the nodegroup.
161161
*
162-
* @default - auto-determined from the instanceTypes property.
162+
* @default - auto-determined from the instanceTypes property when launchTemplateSpec property is not specified
163163
*/
164164
readonly amiType?: NodegroupAmiType;
165165
/**
@@ -357,15 +357,21 @@ export class Nodegroup extends Resource implements INodegroup {
357357
Annotations.of(this).addWarning('"instanceType" is deprecated and will be removed in the next major version. please use "instanceTypes" instead');
358358
}
359359
const instanceTypes = props.instanceTypes ?? (props.instanceType ? [props.instanceType] : undefined);
360-
let expectedAmiType = undefined;
360+
let possibleAmiTypes: NodegroupAmiType[] = [];
361361

362362
if (instanceTypes && instanceTypes.length > 0) {
363-
// if the user explicitly configured instance types, we can calculate the expected ami type.
364-
expectedAmiType = getAmiType(instanceTypes);
365-
366-
// if the user explicitly configured an ami type, make sure its the same as the expected one.
367-
if (props.amiType && props.amiType !== expectedAmiType) {
368-
throw new Error(`The specified AMI does not match the instance types architecture, either specify ${expectedAmiType} or dont specify any`);
363+
/**
364+
* if the user explicitly configured instance types, we can't caculate the expected ami type as we support
365+
* Amazon Linux 2 and Bottlerocket now. However we can check:
366+
*
367+
* 1. instance types of different CPU architectures are not mixed(e.g. X86 with ARM).
368+
* 2. user-specified amiType should be included in `possibleAmiTypes`.
369+
*/
370+
possibleAmiTypes = getPossibleAmiTypes(instanceTypes);
371+
372+
// if the user explicitly configured an ami type, make sure it's included in the possibleAmiTypes
373+
if (props.amiType && !possibleAmiTypes.includes(props.amiType)) {
374+
throw new Error(`The specified AMI does not match the instance types architecture, either specify one of ${possibleAmiTypes} or don't specify any`);
369375
}
370376
}
371377

@@ -387,11 +393,17 @@ export class Nodegroup extends Resource implements INodegroup {
387393
nodegroupName: props.nodegroupName,
388394
nodeRole: this.role.roleArn,
389395
subnets: this.cluster.vpc.selectSubnets(props.subnets).subnetIds,
390-
391-
// if a launch template is configured, we cannot apply a default since it
392-
// might exist in the launch template as well, causing a deployment failure.
393-
amiType: props.launchTemplateSpec !== undefined ? props.amiType : (props.amiType ?? expectedAmiType),
394-
396+
/**
397+
* Case 1: If launchTemplate is explicitly specified with custom AMI, we cannot specify amiType, or the node group deployment will fail.
398+
* As we don't know if the custom AMI is specified in the lauchTemplate, we just use props.amiType.
399+
*
400+
* Case 2: If launchTemplate is not specified, we try to determine amiType from the instanceTypes and it could be either AL2 or Bottlerocket.
401+
* To avoid breaking changes, we use possibleAmiTypes[0] if amiType is undefined and make sure AL2 is always the first element in possibleAmiTypes
402+
* as AL2 is previously the `expectedAmi` and this avoids breaking changes.
403+
*
404+
* That being said, users now either have to explicitly specify correct amiType or just leave it undefined.
405+
*/
406+
amiType: props.launchTemplateSpec ? props.amiType : (props.amiType ?? possibleAmiTypes[0]),
395407
capacityType: props.capacityType ? props.capacityType.valueOf() : undefined,
396408
diskSize: props.diskSize,
397409
forceUpdateEnabled: props.forceUpdate ?? true,
@@ -443,24 +455,55 @@ export class Nodegroup extends Resource implements INodegroup {
443455
}
444456
}
445457

446-
function getAmiTypeForInstanceType(instanceType: InstanceType) {
447-
return INSTANCE_TYPES.graviton2.includes(instanceType.toString().substring(0, 3)) ? NodegroupAmiType.AL2_ARM_64 :
448-
INSTANCE_TYPES.graviton.includes(instanceType.toString().substring(0, 2)) ? NodegroupAmiType.AL2_ARM_64 :
449-
INSTANCE_TYPES.gpu.includes(instanceType.toString().substring(0, 2)) ? NodegroupAmiType.AL2_X86_64_GPU :
450-
INSTANCE_TYPES.inferentia.includes(instanceType.toString().substring(0, 4)) ? NodegroupAmiType.AL2_X86_64_GPU :
451-
NodegroupAmiType.AL2_X86_64;
458+
/**
459+
* AMI types of different architectures. Make sure AL2 is always the first element, which will be the default
460+
* AmiType if amiType and launchTemplateSpec are both undefined.
461+
*/
462+
const arm64AmiTypes: NodegroupAmiType[] = [NodegroupAmiType.AL2_ARM_64, NodegroupAmiType.BOTTLEROCKET_ARM_64];
463+
const x8664AmiTypes: NodegroupAmiType[] = [NodegroupAmiType.AL2_X86_64, NodegroupAmiType.BOTTLEROCKET_X86_64];
464+
const gpuAmiTypes: NodegroupAmiType[] = [NodegroupAmiType.AL2_X86_64_GPU];
465+
466+
467+
/**
468+
* This function check if the instanceType is GPU instance.
469+
* @param instanceType The EC2 instance type
470+
*/
471+
function isGpuInstanceType(instanceType: InstanceType): boolean {
472+
// capture the family, generation, capabilities, and size portions of the instance type id
473+
const instanceTypeComponents = instanceType.toString().match(/^([a-z]+)(\d{1,2})([a-z]*)\.([a-z0-9]+)$/);
474+
if (instanceTypeComponents == null) {
475+
throw new Error('Malformed instance type identifier');
476+
}
477+
const family = instanceTypeComponents[1];
478+
return ['p', 'g', 'inf'].includes(family);
452479
}
453480

454-
// this function examines the CPU architecture of every instance type and determines
455-
// what ami type is compatible for all of them. it either throws or produces a single value because
456-
// instance types of different CPU architectures are not supported.
457-
function getAmiType(instanceTypes: InstanceType[]) {
458-
const amiTypes = new Set(instanceTypes.map(i => getAmiTypeForInstanceType(i)));
459-
if (amiTypes.size == 0) { // protective code, the current implementation will never result in this.
481+
type AmiArchitecture = InstanceArchitecture | 'GPU';
482+
/**
483+
* This function examines the CPU architecture of every instance type and determines
484+
* what AMI types are compatible for all of them. it either throws or produces an array of possible AMI types because
485+
* instance types of different CPU architectures are not supported.
486+
* @param instanceTypes The instance types
487+
* @returns NodegroupAmiType[]
488+
*/
489+
function getPossibleAmiTypes(instanceTypes: InstanceType[]): NodegroupAmiType[] {
490+
function typeToArch(instanceType: InstanceType): AmiArchitecture {
491+
return isGpuInstanceType(instanceType) ? 'GPU' : instanceType.architecture;
492+
}
493+
const archAmiMap = new Map<AmiArchitecture, NodegroupAmiType[]>([
494+
[InstanceArchitecture.ARM_64, arm64AmiTypes],
495+
[InstanceArchitecture.X86_64, x8664AmiTypes],
496+
['GPU', gpuAmiTypes],
497+
]);
498+
const architectures: Set<AmiArchitecture> = new Set(instanceTypes.map(typeToArch));
499+
500+
if (architectures.size === 0) { // protective code, the current implementation will never result in this.
460501
throw new Error(`Cannot determine any ami type comptaible with instance types: ${instanceTypes.map(i => i.toString).join(',')}`);
461502
}
462-
if (amiTypes.size > 1) {
463-
throw new Error('instanceTypes of different CPU architectures is not allowed');
503+
504+
if (architectures.size > 1) {
505+
throw new Error('instanceTypes of different architectures is not allowed');
464506
}
465-
return amiTypes.values().next().value;
507+
508+
return archAmiMap.get(Array.from(architectures)[0])!;
466509
}

0 commit comments

Comments
 (0)