Skip to content

Commit 3e48d79

Browse files
author
AWS
committed
Amazon SageMaker Service Update: This change allows customers to enable data capturing while running a batch transform job, and configure monitoring schedule to monitoring the captured data.
1 parent 6442c6f commit 3e48d79

File tree

2 files changed

+156
-13
lines changed

2 files changed

+156
-13
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"type": "feature",
3+
"category": "Amazon SageMaker Service",
4+
"contributor": "",
5+
"description": "This change allows customers to enable data capturing while running a batch transform job, and configure monitoring schedule to monitoring the captured data."
6+
}

services/sagemaker/src/main/resources/codegen-resources/service-2.json

Lines changed: 150 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4662,6 +4662,25 @@
46624662
"AWS/Textract/AnalyzeDocument/Forms/V1"
46634663
]
46644664
},
4665+
"BatchDataCaptureConfig":{
4666+
"type":"structure",
4667+
"required":["DestinationS3Uri"],
4668+
"members":{
4669+
"DestinationS3Uri":{
4670+
"shape":"S3Uri",
4671+
"documentation":"<p>The Amazon S3 location being used to capture the data.</p>"
4672+
},
4673+
"KmsKeyId":{
4674+
"shape":"KmsKeyId",
4675+
"documentation":"<p>The Amazon Resource Name (ARN) of a Amazon Web Services Key Management Service key that SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the batch transform job.</p> <p>The KmsKeyId can be any of the following formats: </p> <ul> <li> <p>Key ID: <code>1234abcd-12ab-34cd-56ef-1234567890ab</code> </p> </li> <li> <p>Key ARN: <code>arn:aws:kms:us-west-2:111122223333:key/1234abcd-12ab-34cd-56ef-1234567890ab</code> </p> </li> <li> <p>Alias name: <code>alias/ExampleAlias</code> </p> </li> <li> <p>Alias name ARN: <code>arn:aws:kms:us-west-2:111122223333:alias/ExampleAlias</code> </p> </li> </ul>"
4676+
},
4677+
"GenerateInferenceId":{
4678+
"shape":"Boolean",
4679+
"documentation":"<p>Flag that indicates whether to append inference id to the output.</p>"
4680+
}
4681+
},
4682+
"documentation":"<p>Configuration to control how SageMaker captures inference data for batch transform jobs.</p>"
4683+
},
46654684
"BatchDescribeModelPackageError":{
46664685
"type":"structure",
46674686
"required":[
@@ -4757,6 +4776,61 @@
47574776
"SingleRecord"
47584777
]
47594778
},
4779+
"BatchTransformInput":{
4780+
"type":"structure",
4781+
"required":[
4782+
"DataCapturedDestinationS3Uri",
4783+
"DatasetFormat",
4784+
"LocalPath"
4785+
],
4786+
"members":{
4787+
"DataCapturedDestinationS3Uri":{
4788+
"shape":"DestinationS3Uri",
4789+
"documentation":"<p>The Amazon S3 location being used to capture the data.</p>"
4790+
},
4791+
"DatasetFormat":{
4792+
"shape":"MonitoringDatasetFormat",
4793+
"documentation":"<p>The dataset format for your batch transform job.</p>"
4794+
},
4795+
"LocalPath":{
4796+
"shape":"ProcessingLocalPath",
4797+
"documentation":"<p>Path to the filesystem where the batch transform data is available to the container.</p>"
4798+
},
4799+
"S3InputMode":{
4800+
"shape":"ProcessingS3InputMode",
4801+
"documentation":"<p>Whether the <code>Pipe</code> or <code>File</code> is used as the input mode for transferring data for the monitoring job. <code>Pipe</code> mode is recommended for large datasets. <code>File</code> mode is useful for small files that fit in memory. Defaults to <code>File</code>.</p>"
4802+
},
4803+
"S3DataDistributionType":{
4804+
"shape":"ProcessingS3DataDistributionType",
4805+
"documentation":"<p>Whether input data distributed in Amazon S3 is fully replicated or sharded by an S3 key. Defaults to <code>FullyReplicated</code> </p>"
4806+
},
4807+
"FeaturesAttribute":{
4808+
"shape":"String",
4809+
"documentation":"<p>The attributes of the input data that are the input features.</p>"
4810+
},
4811+
"InferenceAttribute":{
4812+
"shape":"String",
4813+
"documentation":"<p>The attribute of the input data that represents the ground truth label.</p>"
4814+
},
4815+
"ProbabilityAttribute":{
4816+
"shape":"String",
4817+
"documentation":"<p>In a classification problem, the attribute that represents the class probability.</p>"
4818+
},
4819+
"ProbabilityThresholdAttribute":{
4820+
"shape":"ProbabilityThresholdAttribute",
4821+
"documentation":"<p>The threshold for the class probability to be evaluated as a positive result.</p>"
4822+
},
4823+
"StartTimeOffset":{
4824+
"shape":"MonitoringTimeOffsetString",
4825+
"documentation":"<p>If specified, monitoring jobs substract this time from the start time. For information about using offsets for scheduling monitoring jobs, see <a href=\"https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-schedule.html\">Schedule Model Quality Monitoring Jobs</a>.</p>"
4826+
},
4827+
"EndTimeOffset":{
4828+
"shape":"MonitoringTimeOffsetString",
4829+
"documentation":"<p>If specified, monitoring jobs substract this time from the end time. For information about using offsets for scheduling monitoring jobs, see <a href=\"https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-schedule.html\">Schedule Model Quality Monitoring Jobs</a>.</p>"
4830+
}
4831+
},
4832+
"documentation":"<p>Input object for the batch transform job.</p>"
4833+
},
47604834
"Bias":{
47614835
"type":"structure",
47624836
"members":{
@@ -8060,6 +8134,10 @@
80608134
"shape":"TransformOutput",
80618135
"documentation":"<p>Describes the results of the transform job.</p>"
80628136
},
8137+
"DataCaptureConfig":{
8138+
"shape":"BatchDataCaptureConfig",
8139+
"documentation":"<p>Configuration to control how SageMaker captures inference data.</p>"
8140+
},
80638141
"TransformResources":{
80648142
"shape":"TransformResources",
80658143
"documentation":"<p>Describes the resources, including ML instance types and ML instance count, to use for the transform job.</p>"
@@ -8529,9 +8607,12 @@
85298607
},
85308608
"DataQualityJobInput":{
85318609
"type":"structure",
8532-
"required":["EndpointInput"],
85338610
"members":{
8534-
"EndpointInput":{"shape":"EndpointInput"}
8611+
"EndpointInput":{"shape":"EndpointInput"},
8612+
"BatchTransformInput":{
8613+
"shape":"BatchTransformInput",
8614+
"documentation":"<p>Input object for the batch transform job.</p>"
8615+
}
85358616
},
85368617
"documentation":"<p>The input for the data quality monitoring job. Currently endpoints are supported for input.</p>"
85378618
},
@@ -12458,6 +12539,10 @@
1245812539
"shape":"TransformOutput",
1245912540
"documentation":"<p>Identifies the Amazon S3 location where you want Amazon SageMaker to save the results from the transform job.</p>"
1246012541
},
12542+
"DataCaptureConfig":{
12543+
"shape":"BatchDataCaptureConfig",
12544+
"documentation":"<p>Configuration to control how SageMaker captures inference data.</p>"
12545+
},
1246112546
"TransformResources":{
1246212547
"shape":"TransformResources",
1246312548
"documentation":"<p>Describes the resources, including ML instance types and ML instance count, to use for the transform job.</p>"
@@ -20328,12 +20413,13 @@
2032820413
},
2032920414
"ModelBiasJobInput":{
2033020415
"type":"structure",
20331-
"required":[
20332-
"EndpointInput",
20333-
"GroundTruthS3Input"
20334-
],
20416+
"required":["GroundTruthS3Input"],
2033520417
"members":{
2033620418
"EndpointInput":{"shape":"EndpointInput"},
20419+
"BatchTransformInput":{
20420+
"shape":"BatchTransformInput",
20421+
"documentation":"<p>Input object for the batch transform job.</p>"
20422+
},
2033720423
"GroundTruthS3Input":{
2033820424
"shape":"MonitoringGroundTruthS3Input",
2033920425
"documentation":"<p>Location of ground truth labels to use in model bias job.</p>"
@@ -20459,9 +20545,12 @@
2045920545
},
2046020546
"ModelExplainabilityJobInput":{
2046120547
"type":"structure",
20462-
"required":["EndpointInput"],
2046320548
"members":{
20464-
"EndpointInput":{"shape":"EndpointInput"}
20549+
"EndpointInput":{"shape":"EndpointInput"},
20550+
"BatchTransformInput":{
20551+
"shape":"BatchTransformInput",
20552+
"documentation":"<p>Input object for the batch transform job.</p>"
20553+
}
2046520554
},
2046620555
"documentation":"<p>Inputs for the model explainability job.</p>"
2046720556
},
@@ -21105,12 +21194,13 @@
2110521194
},
2110621195
"ModelQualityJobInput":{
2110721196
"type":"structure",
21108-
"required":[
21109-
"EndpointInput",
21110-
"GroundTruthS3Input"
21111-
],
21197+
"required":["GroundTruthS3Input"],
2111221198
"members":{
2111321199
"EndpointInput":{"shape":"EndpointInput"},
21200+
"BatchTransformInput":{
21201+
"shape":"BatchTransformInput",
21202+
"documentation":"<p>Input object for the batch transform job.</p>"
21203+
},
2111421204
"GroundTruthS3Input":{
2111521205
"shape":"MonitoringGroundTruthS3Input",
2111621206
"documentation":"<p>The ground truth label provided for the model.</p>"
@@ -21250,6 +21340,34 @@
2125021340
"max":50,
2125121341
"min":1
2125221342
},
21343+
"MonitoringCsvDatasetFormat":{
21344+
"type":"structure",
21345+
"members":{
21346+
"Header":{
21347+
"shape":"Boolean",
21348+
"documentation":"<p>Indicates if the CSV data has a header.</p>"
21349+
}
21350+
},
21351+
"documentation":"<p>Represents the CSV dataset format used when running a monitoring job.</p>"
21352+
},
21353+
"MonitoringDatasetFormat":{
21354+
"type":"structure",
21355+
"members":{
21356+
"Csv":{
21357+
"shape":"MonitoringCsvDatasetFormat",
21358+
"documentation":"<p>The CSV dataset used in the monitoring job.</p>"
21359+
},
21360+
"Json":{
21361+
"shape":"MonitoringJsonDatasetFormat",
21362+
"documentation":"<p>The JSON dataset used in the monitoring job</p>"
21363+
},
21364+
"Parquet":{
21365+
"shape":"MonitoringParquetDatasetFormat",
21366+
"documentation":"<p>The Parquet dataset used in the monitoring job</p>"
21367+
}
21368+
},
21369+
"documentation":"<p>Represents the dataset format used when running a monitoring job.</p>"
21370+
},
2125321371
"MonitoringEnvironmentMap":{
2125421372
"type":"map",
2125521373
"key":{"shape":"ProcessingEnvironmentKey"},
@@ -21333,11 +21451,14 @@
2133321451
},
2133421452
"MonitoringInput":{
2133521453
"type":"structure",
21336-
"required":["EndpointInput"],
2133721454
"members":{
2133821455
"EndpointInput":{
2133921456
"shape":"EndpointInput",
2134021457
"documentation":"<p>The endpoint for a monitoring job.</p>"
21458+
},
21459+
"BatchTransformInput":{
21460+
"shape":"BatchTransformInput",
21461+
"documentation":"<p>Input object for the batch transform job.</p>"
2134121462
}
2134221463
},
2134321464
"documentation":"<p>The inputs for a monitoring job.</p>"
@@ -21447,6 +21568,16 @@
2144721568
"type":"list",
2144821569
"member":{"shape":"MonitoringJobDefinitionSummary"}
2144921570
},
21571+
"MonitoringJsonDatasetFormat":{
21572+
"type":"structure",
21573+
"members":{
21574+
"Line":{
21575+
"shape":"Boolean",
21576+
"documentation":"<p>Indicates if the file should be read as a json object per line. </p>"
21577+
}
21578+
},
21579+
"documentation":"<p>Represents the JSON dataset format used when running a monitoring job.</p>"
21580+
},
2145021581
"MonitoringMaxRuntimeInSeconds":{
2145121582
"type":"integer",
2145221583
"max":86400,
@@ -21499,6 +21630,12 @@
2149921630
"max":1,
2150021631
"min":1
2150121632
},
21633+
"MonitoringParquetDatasetFormat":{
21634+
"type":"structure",
21635+
"members":{
21636+
},
21637+
"documentation":"<p>Represents the Parquet dataset format used when running a monitoring job.</p>"
21638+
},
2150221639
"MonitoringProblemType":{
2150321640
"type":"string",
2150421641
"enum":[

0 commit comments

Comments
 (0)