Skip to content

Commit e120876

Browse files
erdemayyildizErdem Ayyildiz
and
Erdem Ayyildiz
authored
feat(sagemaker): add min-maxCapacity for autoscaling of custom-sagemaker-endpoint (#685)
* feat: add min-maxCapacity for autoscaling of custom-sagemaker-endpoint --------- Co-authored-by: Erdem Ayyildiz <[email protected]>
1 parent 606de61 commit e120876

File tree

5 files changed

+100
-13
lines changed

5 files changed

+100
-13
lines changed

apidocs/classes/CustomSageMakerEndpoint.md

+6
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,12 @@ Default log retention config for all constructs
200200
201201
***
202202

203+
### scalingPolicy
204+
205+
> `readonly` **scalingPolicy**: `StepScalingPolicy`
206+
207+
***
208+
203209
### stage
204210

205211
> **stage**: `string`

apidocs/interfaces/CustomSageMakerEndpointProps.md

+12
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,18 @@
4848
4949
***
5050

51+
### maxCapacity?
52+
53+
> `readonly` `optional` **maxCapacity**: `number`
54+
55+
***
56+
57+
### minCapacity?
58+
59+
> `readonly` `optional` **minCapacity**: `number`
60+
61+
***
62+
5163
### modelDataDownloadTimeoutInSeconds?
5264

5365
> `readonly` `optional` **modelDataDownloadTimeoutInSeconds**: `number`

src/patterns/gen-ai/aws-model-deployment-sagemaker/README_custom_sagemaker_endpoint.md

+9-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,9 @@ new CustomSageMakerEndpoint(this, 'customModel', {
6262
modelDataUrl: 's3://{Bucket}/{Key}/model.tar.gz',
6363
endpointName: 'testbgebase',
6464
instanceCount: 1,
65-
volumeSizeInGb: 100
65+
volumeSizeInGb: 100,
66+
minCapacity: 1,
67+
maxCapacity: 2,
6668
});
6769
```
6870

@@ -92,6 +94,8 @@ CustomSageMakerEndpoint(
9294
endpoint_name='testbgebase',
9395
instance_count=1,
9496
volume_size_in_gb=100,
97+
min_capacity=1,
98+
max_capacity=2,
9599
)
96100
```
97101

@@ -132,6 +136,8 @@ Parameters
132136
| modelDataDownloadTimeoutInSeconds | Integer | ![Optional](https://img.shields.io/badge/optional-4169E1) | The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. |
133137
| volumeSizeInGb | Integer | ![Optional](https://img.shields.io/badge/optional-4169E1) | The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Currently only Amazon EBS gp2 storage volumes are supported. |
134138
| asyncInference | AsyncInferenceConfig | ![Optional](https://img.shields.io/badge/optional-4169E1) | Specifies configuration for how an endpoint performs asynchronous inference. Refer to [AsyncInferenceConfig](#asyncinferenceconfig) for details. If not defined, the endpoint will be configured as real-time.|
139+
| minCapacity | Integer | ![Optional](https://img.shields.io/badge/optional-4169E1) | Specifies the minimum value that Application Auto Scaling can use to scale a target during a scaling activity. |
140+
| maxCapacity | Integer | ![Optional](https://img.shields.io/badge/optional-4169E1) | Specifies the maximum value that Application Auto Scaling can use to scale a target during a scaling activity. |
135141

136142
### AsyncInferenceConfig
137143

@@ -167,6 +173,8 @@ If defined, the SageMaker endpoint will perform asynchronous inference.
167173
- startupHealthCheckTimeoutInSeconds: 600 if not provided
168174
- modelDataDownloadTimeoutInSeconds: 600 if not provided
169175
- instanceCount: 1 if not provided
176+
- minCapacity: 1 if not provided
177+
- maxCapacity: 2 if not provided
170178

171179
If async configuration is enabled:
172180
- Enable server-side encryption for SNS Topics using AWS managed KMS Key

src/patterns/gen-ai/aws-model-deployment-sagemaker/custom-sagemaker-endpoint.ts

+71-12
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* and limitations under the License.
1212
*/
1313
import * as cdk from 'aws-cdk-lib';
14+
import * as applicationautoscaling from 'aws-cdk-lib/aws-applicationautoscaling';
1415
import * as iam from 'aws-cdk-lib/aws-iam';
1516
import * as kms from 'aws-cdk-lib/aws-kms';
1617
import * as sagemaker from 'aws-cdk-lib/aws-sagemaker';
@@ -32,6 +33,8 @@ export interface CustomSageMakerEndpointProps {
3233
readonly modelId: string;
3334
readonly endpointName: string;
3435
readonly instanceType: SageMakerInstanceType;
36+
readonly minCapacity?:number;
37+
readonly maxCapacity?:number;
3538
readonly container: ContainerImage;
3639
readonly instanceCount?: number;
3740
readonly role?: iam.Role;
@@ -51,6 +54,7 @@ export class CustomSageMakerEndpoint extends SageMakerEndpointBase implements ia
5154
public readonly cfnModel: sagemaker.CfnModel;
5255
public readonly cfnEndpoint: sagemaker.CfnEndpoint;
5356
public readonly cfnEndpointConfig: sagemaker.CfnEndpointConfig;
57+
public readonly scalingPolicy: applicationautoscaling.StepScalingPolicy;
5458
public readonly successTopic?: sns.Topic;
5559
public readonly errorTopic?: sns.Topic;
5660

@@ -123,21 +127,24 @@ export class CustomSageMakerEndpoint extends SageMakerEndpointBase implements ia
123127
vpcConfig: props.vpcConfig,
124128
});
125129

130+
const productionVariant: sagemaker.CfnEndpointConfig.ProductionVariantProperty =
131+
{
132+
instanceType: this.instanceType.toString(),
133+
initialVariantWeight: 1,
134+
initialInstanceCount: this.instanceCount,
135+
variantName: 'AllTraffic',
136+
volumeSizeInGb: props.volumeSizeInGb,
137+
modelName: model.getAtt('ModelName').toString(),
138+
containerStartupHealthCheckTimeoutInSeconds: this.startupHealthCheckTimeoutInSeconds,
139+
modelDataDownloadTimeoutInSeconds: this.modelDataDownloadTimeoutInSeconds,
140+
};
141+
142+
126143
const endpointConfig = new sagemaker.CfnEndpointConfig(scope, `EndpointConfig-${id}`, {
127-
productionVariants: [
128-
{
129-
instanceType: this.instanceType.toString(),
130-
initialVariantWeight: 1,
131-
initialInstanceCount: this.instanceCount,
132-
variantName: 'AllTraffic',
133-
volumeSizeInGb: props.volumeSizeInGb,
134-
modelName: model.getAtt('ModelName').toString(),
135-
containerStartupHealthCheckTimeoutInSeconds: this.startupHealthCheckTimeoutInSeconds,
136-
modelDataDownloadTimeoutInSeconds: this.modelDataDownloadTimeoutInSeconds,
137-
},
138-
],
144+
productionVariants: [productionVariant],
139145
});
140146

147+
141148
if (props.asyncInference) {
142149

143150
// build sns topics for success and failure
@@ -180,10 +187,12 @@ export class CustomSageMakerEndpoint extends SageMakerEndpointBase implements ia
180187

181188
endpoint.addDependency(endpointConfig);
182189

190+
183191
this.cfnModel = model;
184192
this.cfnEndpoint = endpoint;
185193
this.cfnEndpointConfig = endpointConfig;
186194
this.endpointArn = endpoint.ref;
195+
this.scalingPolicy = this.buildScalingPolicy(endpoint, productionVariant, props );
187196
}
188197

189198
public addToRolePolicy(statement: iam.PolicyStatement) {
@@ -202,6 +211,56 @@ export class CustomSageMakerEndpoint extends SageMakerEndpointBase implements ia
202211
});
203212
}
204213

214+
private buildScalingPolicy(
215+
endpoint: sagemaker.CfnEndpoint,
216+
productionVariants: sagemaker.CfnEndpointConfig.ProductionVariantProperty,
217+
props: CustomSageMakerEndpointProps): applicationautoscaling.StepScalingPolicy {
218+
const resourceId = `endpoint/${endpoint.attrEndpointName}/variant/${productionVariants.variantName}`;
219+
220+
const scalableTarget = new applicationautoscaling.ScalableTarget(
221+
this,
222+
'ScalableTarget',
223+
{
224+
serviceNamespace: applicationautoscaling.ServiceNamespace.SAGEMAKER,
225+
resourceId: resourceId,
226+
scalableDimension: 'sagemaker:variant:DesiredInstanceCount',
227+
minCapacity: props.minCapacity ?? 1,
228+
maxCapacity: props.maxCapacity ?? 2,
229+
},
230+
);
231+
scalableTarget.node.addDependency(endpoint);
232+
233+
const approximateBacklogMetric = new cdk.aws_cloudwatch.Metric({
234+
namespace: 'AWS/SageMaker',
235+
metricName: 'ApproximateBacklogSizePerInstance',
236+
dimensionsMap: {
237+
Endpoint: endpoint.attrEndpointName,
238+
Variant: productionVariants.variantName,
239+
},
240+
statistic: 'Average',
241+
period: cdk.Duration.minutes(5),
242+
});
243+
244+
const scalingPolicy = new applicationautoscaling.StepScalingPolicy(
245+
this,
246+
'ScalingPolicy',
247+
{
248+
scalingTarget: scalableTarget,
249+
adjustmentType: applicationautoscaling.AdjustmentType.CHANGE_IN_CAPACITY,
250+
metric: approximateBacklogMetric,
251+
scalingSteps: [
252+
{ upper: 0, change: -1, lower: 0 },
253+
{ change: 1, lower: 0.5 },
254+
],
255+
cooldown: cdk.Duration.minutes(5),
256+
datapointsToAlarm: 1,
257+
evaluationPeriods: 1,
258+
},
259+
);
260+
261+
return scalingPolicy;
262+
}
263+
205264
private buildSnsTopic(topicName: string, displayName: string): sns.Topic {
206265
const masterKey = kms.Alias.fromAliasName(this, `aws-managed-key-${topicName}`, 'alias/aws/sns');
207266

test/patterns/gen-ai/aws-model-deployment-sagemaker/aws-custom-sagemaker-endpoint.test.ts

+2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ describe('CustomSageMakerEndpoint construct no async', () => {
5050
endpointName: 'testbgebase',
5151
instanceCount: 1,
5252
volumeSizeInGb: 100,
53+
minCapacity: 1,
54+
maxCapacity: 2,
5355
});
5456
CstTestTemplate = Template.fromStack(CstTestStack);
5557

0 commit comments

Comments
 (0)