Skip to content

Commit c633397

Browse files
authored
feat(customSageMakerEndpoint): Enable Async configuration for endpoint (#591)
* feat(customSageMakerEndpoint): Enable Async configuration for endpoint (#591)
1 parent 119b68f commit c633397

12 files changed

+505
-171
lines changed

apidocs/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737

3838
## Interfaces
3939

40+
- [AsyncInferenceConfig](interfaces/AsyncInferenceConfig.md)
4041
- [BaseClassProps](interfaces/BaseClassProps.md)
4142
- [ContainerImageConfig](interfaces/ContainerImageConfig.md)
4243
- [ContentGenerationAppSyncLambdaProps](interfaces/ContentGenerationAppSyncLambdaProps.md)

apidocs/classes/CustomSageMakerEndpoint.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,12 @@ enable disable xray tracing
9292
9393
***
9494

95+
### errorTopic?
96+
97+
> `readonly` `optional` **errorTopic**: `Topic`
98+
99+
***
100+
95101
### fieldLogLevel
96102

97103
> **fieldLogLevel**: `FieldLogLevel` = `appsync.FieldLogLevel.ALL`
@@ -212,6 +218,12 @@ Value will be appended to resources name.
212218

213219
***
214220

221+
### successTopic?
222+
223+
> `readonly` `optional` **successTopic**: `Topic`
224+
225+
***
226+
215227
### usageMetricMap
216228

217229
> `protected` `static` **usageMetricMap**: `Record`\<`string`, `number`\>
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
[**@cdklabs/generative-ai-cdk-constructs**](../README.md)**Docs**
2+
3+
***
4+
5+
[@cdklabs/generative-ai-cdk-constructs](../README.md) / AsyncInferenceConfig
6+
7+
# Interface: AsyncInferenceConfig
8+
9+
## Properties
10+
11+
### failurePath
12+
13+
> `readonly` **failurePath**: `string`
14+
15+
***
16+
17+
### maxConcurrentInvocationsPerInstance?
18+
19+
> `readonly` `optional` **maxConcurrentInvocationsPerInstance**: `number`
20+
21+
***
22+
23+
### outputPath
24+
25+
> `readonly` **outputPath**: `string`

apidocs/interfaces/CustomSageMakerEndpointProps.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@
88

99
## Properties
1010

11+
### asyncInference?
12+
13+
> `readonly` `optional` **asyncInference**: [`AsyncInferenceConfig`](AsyncInferenceConfig.md)
14+
15+
***
16+
1117
### container
1218

1319
> `readonly` **container**: [`ContainerImage`](../classes/ContainerImage.md)

docs/generative_ai_cdk_constructs.drawio

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<mxfile host="Electron" modified="2024-07-02T00:52:48.483Z" agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/24.4.13 Chrome/124.0.6367.207 Electron/30.0.6 Safari/537.36" etag="7Bo-kMY3EzOB2eYsUAEn" version="24.4.13" type="device" pages="10">
1+
<mxfile host="Electron" agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/24.7.5 Chrome/126.0.6478.183 Electron/31.3.0 Safari/537.36" version="24.7.5" pages="11">
22
<diagram id="yqzoU6PykweUqwamPqNK" name="aws-rag-appsync-stepfn-opensearch">
33
<mxGraphModel dx="2726" dy="658" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
44
<root>
@@ -626,7 +626,7 @@
626626
</root>
627627
</mxGraphModel>
628628
</diagram>
629-
<diagram name="CustomSageMakerEndpoint" id="Ld184xT8tr4mMkqV-7Tk">
629+
<diagram name="CustomSageMakerEndpointRealTime" id="Ld184xT8tr4mMkqV-7Tk">
630630
<mxGraphModel dx="1026" dy="658" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
631631
<root>
632632
<mxCell id="hdcXcgM0UPDYjAbK98_j-0" />
@@ -664,6 +664,46 @@
664664
</root>
665665
</mxGraphModel>
666666
</diagram>
667+
<diagram name="CustomSageMakerEndpointAsync" id="1XxpFNphD4dG7aegsWry">
668+
<mxGraphModel dx="1026" dy="658" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
669+
<root>
670+
<mxCell id="0k5G1l83rrXwrz7Byu6q-0" />
671+
<mxCell id="0k5G1l83rrXwrz7Byu6q-1" parent="0k5G1l83rrXwrz7Byu6q-0" />
672+
<mxCell id="0k5G1l83rrXwrz7Byu6q-2" value="CustomSageMakerEndpoint (async)" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="0k5G1l83rrXwrz7Byu6q-1">
673+
<mxGeometry x="90" y="530" width="610" height="60" as="geometry" />
674+
</mxCell>
675+
<mxCell id="0k5G1l83rrXwrz7Byu6q-3" value="AWS Cloud - User account" style="points=[[0,0],[0.25,0],[0.5,0],[0.75,0],[1,0],[1,0.25],[1,0.5],[1,0.75],[1,1],[0.75,1],[0.5,1],[0.25,1],[0,1],[0,0.75],[0,0.5],[0,0.25]];outlineConnect=0;gradientColor=none;html=1;whiteSpace=wrap;fontSize=12;fontStyle=0;container=1;pointerEvents=0;collapsible=0;recursiveResize=0;shape=mxgraph.aws4.group;grIcon=mxgraph.aws4.group_aws_cloud_alt;strokeColor=#232F3E;fillColor=none;verticalAlign=top;align=left;spacingLeft=30;fontColor=#232F3E;dashed=0;" vertex="1" parent="0k5G1l83rrXwrz7Byu6q-1">
676+
<mxGeometry x="90" y="620" width="610" height="550" as="geometry" />
677+
</mxCell>
678+
<mxCell id="0k5G1l83rrXwrz7Byu6q-4" value="Amazon Simple Storage Service&lt;br&gt;&lt;b&gt;Model artifacts +&lt;/b&gt;&lt;div&gt;&lt;b&gt;input data +&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;Inference results&lt;/b&gt;&lt;/div&gt;" style="sketch=0;points=[[0,0,0],[0.25,0,0],[0.5,0,0],[0.75,0,0],[1,0,0],[0,1,0],[0.25,1,0],[0.5,1,0],[0.75,1,0],[1,1,0],[0,0.25,0],[0,0.5,0],[0,0.75,0],[1,0.25,0],[1,0.5,0],[1,0.75,0]];outlineConnect=0;fontColor=#232F3E;fillColor=#7AA116;strokeColor=#ffffff;dashed=0;verticalLabelPosition=bottom;verticalAlign=top;align=center;html=1;fontSize=12;fontStyle=0;aspect=fixed;shape=mxgraph.aws4.resourceIcon;resIcon=mxgraph.aws4.s3;" vertex="1" parent="0k5G1l83rrXwrz7Byu6q-3">
679+
<mxGeometry x="370" y="350" width="78" height="78" as="geometry" />
680+
</mxCell>
681+
<mxCell id="mQpaVntdGapTdL6flM7p-2" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" edge="1" parent="0k5G1l83rrXwrz7Byu6q-3" source="0k5G1l83rrXwrz7Byu6q-5">
682+
<mxGeometry relative="1" as="geometry">
683+
<mxPoint x="148" y="220" as="targetPoint" />
684+
</mxGeometry>
685+
</mxCell>
686+
<mxCell id="0k5G1l83rrXwrz7Byu6q-5" value="Amazon Elastic&amp;nbsp;&lt;div&gt;Container Registry&lt;br&gt;&lt;/div&gt;" style="sketch=0;points=[[0,0,0],[0.25,0,0],[0.5,0,0],[0.75,0,0],[1,0,0],[0,1,0],[0.25,1,0],[0.5,1,0],[0.75,1,0],[1,1,0],[0,0.25,0],[0,0.5,0],[0,0.75,0],[1,0.25,0],[1,0.5,0],[1,0.75,0]];outlineConnect=0;fontColor=#232F3E;fillColor=#ED7100;strokeColor=#ffffff;dashed=0;verticalLabelPosition=bottom;verticalAlign=top;align=center;html=1;fontSize=12;fontStyle=0;aspect=fixed;shape=mxgraph.aws4.resourceIcon;resIcon=mxgraph.aws4.ecr;" vertex="1" parent="0k5G1l83rrXwrz7Byu6q-3">
687+
<mxGeometry x="110" y="350" width="78" height="78" as="geometry" />
688+
</mxCell>
689+
<mxCell id="mQpaVntdGapTdL6flM7p-0" value="Amazon Simple Notification Service&lt;div&gt;&lt;b&gt;Success and Error notifications&lt;/b&gt;&lt;/div&gt;" style="sketch=0;points=[[0,0,0],[0.25,0,0],[0.5,0,0],[0.75,0,0],[1,0,0],[0,1,0],[0.25,1,0],[0.5,1,0],[0.75,1,0],[1,1,0],[0,0.25,0],[0,0.5,0],[0,0.75,0],[1,0.25,0],[1,0.5,0],[1,0.75,0]];outlineConnect=0;fontColor=#232F3E;fillColor=#E7157B;strokeColor=#ffffff;dashed=0;verticalLabelPosition=bottom;verticalAlign=top;align=center;html=1;fontSize=12;fontStyle=0;aspect=fixed;shape=mxgraph.aws4.resourceIcon;resIcon=mxgraph.aws4.sns;" vertex="1" parent="0k5G1l83rrXwrz7Byu6q-3">
690+
<mxGeometry x="370" y="100" width="78" height="78" as="geometry" />
691+
</mxCell>
692+
<mxCell id="0k5G1l83rrXwrz7Byu6q-8" value="Amazon SageMaker&amp;nbsp;&lt;div&gt;asynchronous endpoint&lt;/div&gt;" style="sketch=0;points=[[0,0,0],[0.25,0,0],[0.5,0,0],[0.75,0,0],[1,0,0],[0,1,0],[0.25,1,0],[0.5,1,0],[0.75,1,0],[1,1,0],[0,0.25,0],[0,0.5,0],[0,0.75,0],[1,0.25,0],[1,0.5,0],[1,0.75,0]];outlineConnect=0;fontColor=#232F3E;fillColor=#01A88D;strokeColor=#ffffff;dashed=0;verticalLabelPosition=bottom;verticalAlign=top;align=center;html=1;fontSize=12;fontStyle=0;aspect=fixed;shape=mxgraph.aws4.resourceIcon;resIcon=mxgraph.aws4.sagemaker;" vertex="1" parent="0k5G1l83rrXwrz7Byu6q-3">
693+
<mxGeometry x="110" y="100" width="78" height="78" as="geometry" />
694+
</mxCell>
695+
<mxCell id="mQpaVntdGapTdL6flM7p-1" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;entryPerimeter=0;" edge="1" parent="0k5G1l83rrXwrz7Byu6q-3" source="0k5G1l83rrXwrz7Byu6q-8" target="mQpaVntdGapTdL6flM7p-0">
696+
<mxGeometry relative="1" as="geometry" />
697+
</mxCell>
698+
<mxCell id="0k5G1l83rrXwrz7Byu6q-7" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" edge="1" parent="0k5G1l83rrXwrz7Byu6q-1">
699+
<mxGeometry relative="1" as="geometry">
700+
<mxPoint x="460" y="1009" as="sourcePoint" />
701+
<mxPoint x="278" y="1009" as="targetPoint" />
702+
</mxGeometry>
703+
</mxCell>
704+
</root>
705+
</mxGraphModel>
706+
</diagram>
667707
<diagram id="zwjm-m0tJrHEoboqtmw_" name="aws-contentgen-appsync-lambda">
668708
<mxGraphModel dx="2726" dy="669" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
669709
<root>

src/patterns/gen-ai/aws-model-deployment-sagemaker/README_custom_sagemaker_endpoint.md

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ env: {
4444
},
4545
```
4646

47-
Here is a minimal deployable pattern definition:
47+
Here is a minimal deployable pattern definition to deploy a real-time Amazon SageMaker endpoint:
4848

4949
TypeScript
5050
```typescript
@@ -95,6 +95,12 @@ CustomSageMakerEndpoint(
9595
)
9696
```
9797

98+
The construct also allows you to deploy an asyncronous SageMaker endpoint. Amazon SageMaker Asynchronous Inference is a capability in SageMaker that queues incoming requests and processes them asynchronously. This option is ideal for requests with large payload sizes (up to 1GB), long processing times (up to one hour), and near real-time latency requirements.
99+
100+
Asynchronous Inference enables you to save on costs by autoscaling the instance count to zero when there are no requests to process, so you only pay when your endpoint is processing requests. For more information about asynchronous inference, please refer to the [documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/async-inference.html).
101+
102+
To configure the endpoint in asynchronous mode, you simply need to define the [AsyncInferenceConfig](#asyncinferenceconfig) in the construct properties. In this case, the construct will provision two Amazon Simple Notification Service topics which can be used to received notifications about inference (failure and success).
103+
98104
## Initializer
99105

100106
```
@@ -125,6 +131,17 @@ Parameters
125131
| startupHealthCheckTimeoutInSeconds | Integer | ![Optional](https://img.shields.io/badge/optional-4169E1) | The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting |
126132
| modelDataDownloadTimeoutInSeconds | Integer | ![Optional](https://img.shields.io/badge/optional-4169E1) | The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. |
127133
| volumeSizeInGb | Integer | ![Optional](https://img.shields.io/badge/optional-4169E1) | The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Currently only Amazon EBS gp2 storage volumes are supported. |
134+
| asyncInference | AsyncInferenceConfig | ![Optional](https://img.shields.io/badge/optional-4169E1) | Specifies configuration for how an endpoint performs asynchronous inference. Refer to [AsyncInferenceConfig](#asyncinferenceconfig) for details. If not defined, the endpoint will be configured as real-time.|
135+
136+
### AsyncInferenceConfig
137+
138+
If defined, the SageMaker endpoint will perform asynchronous inference.
139+
140+
| **Name** | **Type** | **Required** |**Description** |
141+
|:-------------|:----------------|-----------------|-----------------|
142+
| failurePath | string | ![Required](https://img.shields.io/badge/required-ff0000) | The Amazon S3 location to upload failure inference responses to. This location needs to be in the same bucket containing the model artifacts. |
143+
| outputPath | string | ![Required](https://img.shields.io/badge/required-ff0000) | The Amazon S3 location to upload inference responses to. This location needs to be in the same bucket containing the model artifacts. |
144+
| maxConcurrentInvocationsPerInstance | number | ![Optional](https://img.shields.io/badge/optional-4169E1) | The maximum number of concurrent requests sent by the SageMaker client to the model container. |
128145

129146
## Pattern Properties
130147

@@ -141,6 +158,8 @@ Parameters
141158
|instanceType| SageMakerInstanceType | The ML compute instance type |
142159
|instanceCount| number | Number of instances to launch initially|
143160
|role| [iam.Role](https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_iam.Role.html) |The IAM role that SageMaker can assume to access model artifacts and docker image for deployment on ML compute instances or for batch transform jobs |
161+
|successTopic| [sns.Topic](https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_sns.Topic.html) | Amazon SNS topic to post a notification to when an inference completes successfully. If async configuration is not provided, this will not be defined.|
162+
|errorTopic| [sns.Topic](https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_sns.Topic.html) | Amazon SNS topic to post a notification to when an inference fails. If async configuration is not provided, this will not be defined.|
144163

145164
## Default properties
146165

@@ -149,12 +168,25 @@ Parameters
149168
- modelDataDownloadTimeoutInSeconds: 600 if not provided
150169
- instanceCount: 1 if not provided
151170

171+
If async configuration is enabled:
172+
- Enable server-side encryption for SNS Topics using AWS managed KMS Key
173+
- maxConcurrentInvocationsPerInstance: 10 if not provided
174+
152175
## Troubleshooting
153176

154177

155178

156179
## Architecture
157-
![Architecture Diagram](architecture_CustomSageMakerEndpoint.png)
180+
181+
Real-time endpoint architecture:
182+
183+
![Architecture Real-time Diagram](architecture_rt_CustomSageMakerEndpoint.png)
184+
185+
Asynchronous endpoint architecture:
186+
187+
To invoke the endpoint, you need to place the request payload in Amazon Simple Storage Service (S3). You also need to provide a pointer to this payload as a part of the InvokeEndpointAsync request. Upon invocation, SageMaker queues the request for processing and returns an identifier and output location as a response. Upon processing, SageMaker places the result in the Amazon S3 location.
188+
189+
![Architecture Async Diagram](architecture_async_CustomSageMakerEndpoint.png)
158190

159191
## Cost
160192

src/patterns/gen-ai/aws-model-deployment-sagemaker/custom-sagemaker-endpoint.ts

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,22 @@
1212
*/
1313
import * as cdk from 'aws-cdk-lib';
1414
import * as iam from 'aws-cdk-lib/aws-iam';
15+
import * as kms from 'aws-cdk-lib/aws-kms';
1516
import * as sagemaker from 'aws-cdk-lib/aws-sagemaker';
17+
import * as sns from 'aws-cdk-lib/aws-sns';
1618
import { Construct } from 'constructs';
1719
import { ContainerImage } from './container-image';
1820
import { SageMakerEndpointBase } from './sagemaker-endpoint-base';
1921
import { SageMakerInstanceType } from './sagemaker-instance-type';
2022
import { ConstructName } from '../../../common/base-class';
2123
import { BaseClassProps } from '../../../common/base-class/base-class';
2224

25+
export interface AsyncInferenceConfig {
26+
readonly failurePath: string;
27+
readonly outputPath: string;
28+
readonly maxConcurrentInvocationsPerInstance?: number;
29+
}
30+
2331
export interface CustomSageMakerEndpointProps {
2432
readonly modelId: string;
2533
readonly endpointName: string;
@@ -33,6 +41,7 @@ export interface CustomSageMakerEndpointProps {
3341
readonly volumeSizeInGb?: number | undefined;
3442
readonly vpcConfig?: sagemaker.CfnModel.VpcConfigProperty | undefined;
3543
readonly modelDataUrl: string;
44+
readonly asyncInference?: AsyncInferenceConfig | undefined;
3645

3746
}
3847

@@ -42,6 +51,8 @@ export class CustomSageMakerEndpoint extends SageMakerEndpointBase implements ia
4251
public readonly cfnModel: sagemaker.CfnModel;
4352
public readonly cfnEndpoint: sagemaker.CfnEndpoint;
4453
public readonly cfnEndpointConfig: sagemaker.CfnEndpointConfig;
54+
public readonly successTopic?: sns.Topic;
55+
public readonly errorTopic?: sns.Topic;
4556

4657
public readonly instanceType?: SageMakerInstanceType;
4758
public readonly instanceCount: number;
@@ -64,7 +75,6 @@ export class CustomSageMakerEndpoint extends SageMakerEndpointBase implements ia
6475
const lambdaFunctions: cdk.aws_lambda.DockerImageFunction[]=[];
6576
this.updateConstructUsageMetricCode( baseProps, scope, lambdaFunctions);
6677

67-
6878
this.instanceType = props.instanceType;
6979
this.modelId = props.modelId;
7080
this.instanceCount = Math.max(1, props.instanceCount ?? 1);
@@ -128,6 +138,33 @@ export class CustomSageMakerEndpoint extends SageMakerEndpointBase implements ia
128138
],
129139
});
130140

141+
if (props.asyncInference) {
142+
143+
// build sns topics for success and failure
144+
const successTopic = this.buildSnsTopic(`success-topic-${id}`, 'Success Topic');
145+
const failureTopic = this.buildSnsTopic(`failure-topic-${id}`, 'Failure Topic');
146+
147+
this.errorTopic = failureTopic;
148+
this.successTopic = successTopic;
149+
150+
// configure async inference
151+
const asyncInferenceConfigProperty: sagemaker.CfnEndpointConfig.AsyncInferenceConfigProperty = {
152+
outputConfig: {
153+
s3FailurePath: props.asyncInference.failurePath,
154+
s3OutputPath: props.asyncInference.outputPath,
155+
notificationConfig: {
156+
successTopic: successTopic.topicArn,
157+
errorTopic: failureTopic.topicArn,
158+
},
159+
},
160+
clientConfig: {
161+
maxConcurrentInvocationsPerInstance: props.asyncInference.maxConcurrentInvocationsPerInstance ?? 10,
162+
},
163+
};
164+
165+
endpointConfig.asyncInferenceConfig = asyncInferenceConfigProperty;
166+
}
167+
131168
endpointConfig.addDependency(model);
132169

133170
const endpoint = new sagemaker.CfnEndpoint(scope, `${modelIdStr}-endpoint-${id}`, {
@@ -164,4 +201,30 @@ export class CustomSageMakerEndpoint extends SageMakerEndpointBase implements ia
164201
resourceArns: [this.endpointArn],
165202
});
166203
}
204+
205+
private buildSnsTopic(topicName: string, displayName: string): sns.Topic {
206+
const masterKey = kms.Alias.fromAliasName(this, `aws-managed-key-${topicName}`, 'alias/aws/sns');
207+
208+
const topic = new sns.Topic(this, topicName, {
209+
topicName,
210+
displayName,
211+
masterKey: masterKey,
212+
});
213+
214+
topic.grantPublish(this.role);
215+
216+
topic.addToResourcePolicy(new iam.PolicyStatement({
217+
actions: ['sns:Publish'],
218+
effect: iam.Effect.DENY,
219+
resources: [topic.topicArn],
220+
conditions: {
221+
Bool: {
222+
'aws:SecureTransport': 'false',
223+
},
224+
},
225+
principals: [new iam.AnyPrincipal()],
226+
}));
227+
228+
return topic;
229+
}
167230
}

0 commit comments

Comments
 (0)