@@ -195,3 +195,70 @@ const endpointConfig = new sagemaker.EndpointConfig(this, 'EndpointConfig', {
195
195
]
196
196
});
197
197
```
198
+
199
+ ### Endpoint
200
+
201
+ When you create an endpoint from an ` EndpointConfig ` , Amazon SageMaker launches the ML compute
202
+ instances and deploys the model or models as specified in the configuration. To get inferences from
203
+ the model, client applications send requests to the Amazon SageMaker Runtime HTTPS endpoint. For
204
+ more information about the API, see the
205
+ [ InvokeEndpoint] ( https://docs.aws.amazon.com/sagemaker/latest/dg/API_runtime_InvokeEndpoint.html )
206
+ API. Defining an endpoint requires at minimum the associated endpoint configuration:
207
+
208
+ ``` typescript
209
+ import * as sagemaker from ' @aws-cdk/aws-sagemaker' ;
210
+
211
+ declare const endpointConfig: sagemaker .EndpointConfig ;
212
+
213
+ const endpoint = new sagemaker .Endpoint (this , ' Endpoint' , { endpointConfig });
214
+ ```
215
+
216
+ ### AutoScaling
217
+
218
+ To enable autoscaling on the production variant, use the ` autoScaleInstanceCount ` method:
219
+
220
+ ``` typescript
221
+ import * as sagemaker from ' @aws-cdk/aws-sagemaker' ;
222
+
223
+ declare const model: sagemaker .Model ;
224
+
225
+ const variantName = ' my-variant' ;
226
+ const endpointConfig = new sagemaker .EndpointConfig (this , ' EndpointConfig' , {
227
+ instanceProductionVariants: [
228
+ {
229
+ model: model ,
230
+ variantName: variantName ,
231
+ },
232
+ ]
233
+ });
234
+
235
+ const endpoint = new sagemaker .Endpoint (this , ' Endpoint' , { endpointConfig });
236
+ const productionVariant = endpoint .findInstanceProductionVariant (variantName );
237
+ const instanceCount = productionVariant .autoScaleInstanceCount ({
238
+ maxCapacity: 3
239
+ });
240
+ instanceCount .scaleOnInvocations (' LimitRPS' , {
241
+ maxRequestsPerSecond: 30 ,
242
+ });
243
+ ```
244
+
245
+ For load testing guidance on determining the maximum requests per second per instance, please see
246
+ this [ documentation] ( https://docs.aws.amazon.com/sagemaker/latest/dg/endpoint-scaling-loadtest.html ) .
247
+
248
+ ### Metrics
249
+
250
+ To monitor CloudWatch metrics for a production variant, use one or more of the metric convenience
251
+ methods:
252
+
253
+ ``` typescript
254
+ import * as sagemaker from ' @aws-cdk/aws-sagemaker' ;
255
+
256
+ declare const endpointConfig: sagemaker .EndpointConfig ;
257
+
258
+ const endpoint = new sagemaker .Endpoint (this , ' Endpoint' , { endpointConfig });
259
+ const productionVariant = endpoint .findInstanceProductionVariant (' my-variant' );
260
+ productionVariant .metricModelLatency ().createAlarm (this , ' ModelLatencyAlarm' , {
261
+ threshold: 100000 ,
262
+ evaluationPeriods: 3 ,
263
+ });
264
+ ```
0 commit comments