Skip to content

Commit 7b2dd45

Browse files
author
AWS
committed
Amazon SageMaker Service Update: CreateInferenceRecommenderjob API now supports passing endpoint details directly, that will help customers to identify the max invocation and max latency they can achieve for their model and the associated endpoint along with getting recommendations on other instances.
1 parent 8f889eb commit 7b2dd45

File tree

3 files changed

+70
-2
lines changed

3 files changed

+70
-2
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"type": "feature",
3+
"category": "Amazon SageMaker Service",
4+
"contributor": "",
5+
"description": "CreateInferenceRecommenderjob API now supports passing endpoint details directly, that will help customers to identify the max invocation and max latency they can achieve for their model and the associated endpoint along with getting recommendations on other instances."
6+
}

services/sagemaker/src/main/resources/codegen-resources/endpoint-rule-set.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"parameters": {
44
"Region": {
55
"builtIn": "AWS::Region",
6-
"required": true,
6+
"required": false,
77
"documentation": "The AWS region used to dispatch the request.",
88
"type": "String"
99
},

services/sagemaker/src/main/resources/codegen-resources/service-2.json

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11187,6 +11187,10 @@
1118711187
"InferenceRecommendations":{
1118811188
"shape":"InferenceRecommendations",
1118911189
"documentation":"<p>The recommendations made by Inference Recommender.</p>"
11190+
},
11191+
"EndpointPerformances":{
11192+
"shape":"EndpointPerformances",
11193+
"documentation":"<p>The performance results from running an Inference Recommender job on an existing endpoint.</p>"
1119011194
}
1119111195
}
1119211196
},
@@ -13893,6 +13897,17 @@
1389313897
"type":"list",
1389413898
"member":{"shape":"EndpointConfigSummary"}
1389513899
},
13900+
"EndpointInfo":{
13901+
"type":"structure",
13902+
"required":["EndpointName"],
13903+
"members":{
13904+
"EndpointName":{
13905+
"shape":"EndpointName",
13906+
"documentation":"<p>The name of a customer's endpoint.</p>"
13907+
}
13908+
},
13909+
"documentation":"<p>Details about a customer endpoint that was compared in an Inference Recommender job.</p>"
13910+
},
1389613911
"EndpointInput":{
1389713912
"type":"structure",
1389813913
"required":[
@@ -14006,6 +14021,26 @@
1400614021
},
1400714022
"documentation":"<p>The endpoint configuration made by Inference Recommender during a recommendation job.</p>"
1400814023
},
14024+
"EndpointPerformance":{
14025+
"type":"structure",
14026+
"required":[
14027+
"Metrics",
14028+
"EndpointInfo"
14029+
],
14030+
"members":{
14031+
"Metrics":{
14032+
"shape":"InferenceMetrics",
14033+
"documentation":"<p>The metrics for an existing endpoint.</p>"
14034+
},
14035+
"EndpointInfo":{"shape":"EndpointInfo"}
14036+
},
14037+
"documentation":"<p>The performance results from running an Inference Recommender job on an existing endpoint.</p>"
14038+
},
14039+
"EndpointPerformances":{
14040+
"type":"list",
14041+
"member":{"shape":"EndpointPerformance"},
14042+
"max":1
14043+
},
1400914044
"EndpointSortKey":{
1401014045
"type":"string",
1401114046
"enum":[
@@ -14064,6 +14099,11 @@
1406414099
"type":"list",
1406514100
"member":{"shape":"EndpointSummary"}
1406614101
},
14102+
"Endpoints":{
14103+
"type":"list",
14104+
"member":{"shape":"EndpointInfo"},
14105+
"max":1
14106+
},
1406714107
"EntityDescription":{
1406814108
"type":"string",
1406914109
"max":1024,
@@ -16115,6 +16155,24 @@
1611516155
"type":"string",
1611616156
"max":256
1611716157
},
16158+
"InferenceMetrics":{
16159+
"type":"structure",
16160+
"required":[
16161+
"MaxInvocations",
16162+
"ModelLatency"
16163+
],
16164+
"members":{
16165+
"MaxInvocations":{
16166+
"shape":"Integer",
16167+
"documentation":"<p>The expected maximum number of requests per minute for the instance.</p>"
16168+
},
16169+
"ModelLatency":{
16170+
"shape":"Integer",
16171+
"documentation":"<p>The expected model latency at maximum invocations per minute for the instance.</p>"
16172+
}
16173+
},
16174+
"documentation":"<p>The metrics for an existing endpoint compared in an Inference Recommender job.</p>"
16175+
},
1611816176
"InferenceRecommendation":{
1611916177
"type":"structure",
1612016178
"required":[
@@ -24623,6 +24681,10 @@
2462324681
"ContainerConfig":{
2462424682
"shape":"RecommendationJobContainerConfig",
2462524683
"documentation":"<p>Specifies mandatory fields for running an Inference Recommender job. The fields specified in <code>ContainerConfig</code> override the corresponding fields in the model package.</p>"
24684+
},
24685+
"Endpoints":{
24686+
"shape":"Endpoints",
24687+
"documentation":"<p>Existing customer endpoints on which to run an Inference Recommender job.</p>"
2462624688
}
2462724689
},
2462824690
"documentation":"<p>The input configuration of the recommendation job.</p>"
@@ -27345,7 +27407,7 @@
2734527407
},
2734627408
"InstanceCount":{
2734727409
"shape":"TransformInstanceCount",
27348-
"documentation":"<p>The number of ML compute instances to use in the transform job. For distributed transform jobs, specify a value greater than 1. The default value is <code>1</code>.</p>"
27410+
"documentation":"<p>The number of ML compute instances to use in the transform job. The default value is <code>1</code>, and the maximum is <code>100</code>. For distributed transform jobs, specify a value greater than <code>1</code>.</p>"
2734927411
},
2735027412
"VolumeKmsKeyId":{
2735127413
"shape":"KmsKeyId",

0 commit comments

Comments
 (0)