Skip to content

Commit c1511a3

Browse files
Merge branch 'master' into monitor-batch-transform-step-doc
2 parents bebe700 + 907f4ff commit c1511a3

File tree

19 files changed

+238
-231
lines changed

19 files changed

+238
-231
lines changed

.github/PULL_REQUEST_TEMPLATE.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ _Put an `x` in the boxes that apply. You can also fill these out after creating
1111
#### General
1212

1313
- [ ] I have read the [CONTRIBUTING](https://github.com/aws/sagemaker-python-sdk/blob/master/CONTRIBUTING.md) doc
14-
- [ ] I certify that the changes I am introducing will be backword compatible, and I have discussed concerns about this, if any, with the Python SDK team
14+
- [ ] I certify that the changes I am introducing will be backward compatible, and I have discussed concerns about this, if any, with the Python SDK team
1515
- [ ] I used the commit message format described in [CONTRIBUTING](https://github.com/aws/sagemaker-python-sdk/blob/master/CONTRIBUTING.md#committing-your-change)
1616
- [ ] I have passed the region in to all S3 and STS clients that I've initialized as part of this change.
1717
- [ ] I have updated any necessary documentation, including [READMEs](https://github.com/aws/sagemaker-python-sdk/blob/master/README.rst) and [API docs](https://github.com/aws/sagemaker-python-sdk/tree/master/doc) (if appropriate)

CHANGELOG.md

+27
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,32 @@
11
# Changelog
22

3+
## v2.112.2 (2022-10-11)
4+
5+
### Bug Fixes and Other Changes
6+
7+
* Update Neo-TF2.x versions to TF2.9(.2)
8+
9+
### Documentation Changes
10+
11+
* fix typo in PR template
12+
13+
## v2.112.1 (2022-10-10)
14+
15+
### Bug Fixes and Other Changes
16+
17+
* fix(local-mode): loosen docker requirement to allow 6.0.0
18+
* CreateModelPackage API error for Scikit-learn and XGBoost frameworkss
19+
20+
## v2.112.0 (2022-10-09)
21+
22+
### Features
23+
24+
* added monitor batch transform step (pipeline)
25+
26+
### Bug Fixes and Other Changes
27+
28+
* Add PipelineVariable annotation to framework estimators
29+
330
## v2.111.0 (2022-10-05)
431

532
### Features

VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.111.1.dev0
1+
2.112.3.dev0
+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
urllib3==1.26.8
22
docker-compose==1.29.2
3-
docker~=5.0.0
3+
docker>=5.0.2,<7.0.0
44
PyYAML==5.4.1

src/sagemaker/algorithm.py

+66-45
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,22 @@
1313
"""Test docstring"""
1414
from __future__ import absolute_import
1515

16+
from typing import Optional, Union, Dict, List
17+
1618
import sagemaker
1719
import sagemaker.parameter
1820
from sagemaker import vpc_utils
1921
from sagemaker.deserializers import BytesDeserializer
2022
from sagemaker.deprecations import removed_kwargs
2123
from sagemaker.estimator import EstimatorBase
24+
from sagemaker.inputs import TrainingInput, FileSystemInput
2225
from sagemaker.serializers import IdentitySerializer
2326
from sagemaker.transformer import Transformer
2427
from sagemaker.predictor import Predictor
28+
from sagemaker.session import Session
29+
from sagemaker.workflow.entities import PipelineVariable
30+
31+
from sagemaker.workflow import is_pipeline_variable
2532

2633

2734
class AlgorithmEstimator(EstimatorBase):
@@ -37,28 +44,28 @@ class AlgorithmEstimator(EstimatorBase):
3744

3845
def __init__(
3946
self,
40-
algorithm_arn,
41-
role,
42-
instance_count,
43-
instance_type,
44-
volume_size=30,
45-
volume_kms_key=None,
46-
max_run=24 * 60 * 60,
47-
input_mode="File",
48-
output_path=None,
49-
output_kms_key=None,
50-
base_job_name=None,
51-
sagemaker_session=None,
52-
hyperparameters=None,
53-
tags=None,
54-
subnets=None,
55-
security_group_ids=None,
56-
model_uri=None,
57-
model_channel_name="model",
58-
metric_definitions=None,
59-
encrypt_inter_container_traffic=False,
60-
use_spot_instances=False,
61-
max_wait=None,
47+
algorithm_arn: str,
48+
role: str,
49+
instance_count: Optional[Union[int, PipelineVariable]] = None,
50+
instance_type: Optional[Union[str, PipelineVariable]] = None,
51+
volume_size: Union[int, PipelineVariable] = 30,
52+
volume_kms_key: Optional[Union[str, PipelineVariable]] = None,
53+
max_run: Union[int, PipelineVariable] = 24 * 60 * 60,
54+
input_mode: Union[str, PipelineVariable] = "File",
55+
output_path: Optional[Union[str, PipelineVariable]] = None,
56+
output_kms_key: Optional[Union[str, PipelineVariable]] = None,
57+
base_job_name: Optional[str] = None,
58+
sagemaker_session: Optional[Session] = None,
59+
hyperparameters: Optional[Dict[str, Union[str, PipelineVariable]]] = None,
60+
tags: Optional[List[Dict[str, Union[str, PipelineVariable]]]] = None,
61+
subnets: Optional[List[Union[str, PipelineVariable]]] = None,
62+
security_group_ids: Optional[List[Union[str, PipelineVariable]]] = None,
63+
model_uri: Optional[str] = None,
64+
model_channel_name: Union[str, PipelineVariable] = "model",
65+
metric_definitions: Optional[List[Dict[str, Union[str, PipelineVariable]]]] = None,
66+
encrypt_inter_container_traffic: Union[bool, PipelineVariable] = False,
67+
use_spot_instances: Union[bool, PipelineVariable] = False,
68+
max_wait: Optional[Union[int, PipelineVariable]] = None,
6269
**kwargs # pylint: disable=W0613
6370
):
6471
"""Initialize an ``AlgorithmEstimator`` instance.
@@ -71,18 +78,21 @@ def __init__(
7178
access training data and model artifacts. After the endpoint
7279
is created, the inference code might use the IAM role, if it
7380
needs to access an AWS resource.
74-
instance_count (int): Number of Amazon EC2 instances to use for training.
75-
instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'.
76-
volume_size (int): Size in GB of the EBS volume to use for
81+
instance_count (int or PipelineVariable): Number of Amazon EC2 instances to use
82+
for training.
83+
instance_type (str or PipelineVariable): Type of EC2 instance to use for training,
84+
for example, 'ml.c4.xlarge'.
85+
volume_size (int or PipelineVariable): Size in GB of the EBS volume to use for
7786
storing input data during training (default: 30). Must be large enough to store
7887
training data if File Mode is used (which is the default).
79-
volume_kms_key (str): Optional. KMS key ID for encrypting EBS volume attached
80-
to the training instance (default: None).
81-
max_run (int): Timeout in seconds for training (default: 24 * 60 * 60).
88+
volume_kms_key (str or PipelineVariable): Optional. KMS key ID for encrypting
89+
EBS volume attached to the training instance (default: None).
90+
max_run (int or PipelineVariable): Timeout in seconds for training
91+
(default: 24 * 60 * 60).
8292
After this amount of time Amazon SageMaker terminates the
8393
job regardless of its current status.
84-
input_mode (str): The input mode that the algorithm supports
85-
(default: 'File'). Valid modes:
94+
input_mode (str or PipelineVariable): The input mode that the algorithm supports
95+
(default: 'File'). Valid modes:
8696
8797
* 'File' - Amazon SageMaker copies the training dataset from
8898
the S3 location to a local directory.
@@ -92,13 +102,14 @@ def __init__(
92102
This argument can be overriden on a per-channel basis using
93103
``sagemaker.inputs.TrainingInput.input_mode``.
94104
95-
output_path (str): S3 location for saving the training result (model artifacts and
96-
output files). If not specified, results are stored to a default bucket. If
105+
output_path (str or PipelineVariable): S3 location for saving the training result
106+
(model artifacts and output files). If not specified,
107+
results are stored to a default bucket. If
97108
the bucket with the specific name does not exist, the
98109
estimator creates the bucket during the
99110
:meth:`~sagemaker.estimator.EstimatorBase.fit` method
100111
execution.
101-
output_kms_key (str): Optional. KMS key ID for encrypting the
112+
output_kms_key (str or PipelineVariable): Optional. KMS key ID for encrypting the
102113
training output (default: None). base_job_name (str): Prefix for
103114
training job name when the
104115
:meth:`~sagemaker.estimator.EstimatorBase.fit`
@@ -109,9 +120,10 @@ def __init__(
109120
interactions with Amazon SageMaker APIs and any other AWS services needed. If
110121
not specified, the estimator creates one using the default
111122
AWS configuration chain.
112-
tags (list[dict]): List of tags for labeling a training job. For more, see
123+
tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): List of tags for
124+
labeling a training job. For more, see
113125
https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.
114-
subnets (list[str]): List of subnet ids. If not specified
126+
subnets (list[str] or list[PipelineVariable]): List of subnet ids. If not specified
115127
training job will be created without VPC config.
116128
security_group_ids (list[str]): List of security group ids. If
117129
not specified training job will be created without VPC config.
@@ -122,22 +134,22 @@ def __init__(
122134
other artifacts coming from a different source.
123135
More information:
124136
https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html#td-deserialization
125-
model_channel_name (str): Name of the channel where 'model_uri'
137+
model_channel_name (str or PipelineVariable): Name of the channel where 'model_uri'
126138
will be downloaded (default: 'model'). metric_definitions
127139
(list[dict]): A list of dictionaries that defines the metric(s)
128140
used to evaluate the training jobs. Each dictionary contains two keys: 'Name' for
129141
the name of the metric, and 'Regex' for the regular
130142
expression used to extract the metric from the logs.
131-
encrypt_inter_container_traffic (bool): Specifies whether traffic between training
132-
containers is encrypted for the training job (default: ``False``).
133-
use_spot_instances (bool): Specifies whether to use SageMaker
143+
encrypt_inter_container_traffic (bool or PipelineVariable): Specifies whether traffic
144+
between training containers is encrypted for the training job (default: ``False``).
145+
use_spot_instances (bool or PipelineVariable): Specifies whether to use SageMaker
134146
Managed Spot instances for training. If enabled then the
135147
`max_wait` arg should also be set.
136148
137149
More information:
138150
https://docs.aws.amazon.com/sagemaker/latest/dg/model-managed-spot-training.html
139151
(default: ``False``).
140-
max_wait (int): Timeout in seconds waiting for spot training
152+
max_wait (int or PipelineVariable): Timeout in seconds waiting for spot training
141153
instances (default: None). After this amount of time Amazon
142154
SageMaker will stop waiting for Spot instances to become
143155
available (default: ``None``).
@@ -186,22 +198,25 @@ def validate_train_spec(self):
186198
# Check that the input mode provided is compatible with the training input modes for the
187199
# algorithm.
188200
input_modes = self._algorithm_training_input_modes(train_spec["TrainingChannels"])
189-
if self.input_mode not in input_modes:
201+
if not is_pipeline_variable(self.input_mode) and self.input_mode not in input_modes:
190202
raise ValueError(
191203
"Invalid input mode: %s. %s only supports: %s"
192204
% (self.input_mode, algorithm_name, input_modes)
193205
)
194206

195207
# Check that the training instance type is compatible with the algorithm.
196208
supported_instances = train_spec["SupportedTrainingInstanceTypes"]
197-
if self.instance_type not in supported_instances:
209+
if (
210+
not is_pipeline_variable(self.instance_type)
211+
and self.instance_type not in supported_instances
212+
):
198213
raise ValueError(
199214
"Invalid instance_type: %s. %s supports the following instance types: %s"
200215
% (self.instance_type, algorithm_name, supported_instances)
201216
)
202217

203218
# Verify if distributed training is supported by the algorithm
204-
if (
219+
if not is_pipeline_variable(self.instance_count) and (
205220
self.instance_count > 1
206221
and "SupportsDistributedTraining" in train_spec
207222
and not train_spec["SupportsDistributedTraining"]
@@ -414,12 +429,18 @@ def _prepare_for_training(self, job_name=None):
414429

415430
super(AlgorithmEstimator, self)._prepare_for_training(job_name)
416431

417-
def fit(self, inputs=None, wait=True, logs=True, job_name=None):
432+
def fit(
433+
self,
434+
inputs: Optional[Union[str, Dict, TrainingInput, FileSystemInput]] = None,
435+
wait: bool = True,
436+
logs: bool = True,
437+
job_name: Optional[str] = None,
438+
):
418439
"""Placeholder docstring"""
419440
if inputs:
420441
self._validate_input_channels(inputs)
421442

422-
super(AlgorithmEstimator, self).fit(inputs, wait, logs, job_name)
443+
return super(AlgorithmEstimator, self).fit(inputs, wait, logs, job_name)
423444

424445
def _validate_input_channels(self, channels):
425446
"""Placeholder docstring"""

src/sagemaker/chainer/estimator.py

+20-20
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from __future__ import absolute_import
1515

1616
import logging
17-
from typing import Union, Optional
17+
from typing import Union, Optional, Dict
1818

1919
from sagemaker.estimator import Framework, EstimatorBase
2020
from sagemaker.fw_utils import (
@@ -34,26 +34,26 @@
3434
class Chainer(Framework):
3535
"""Handle end-to-end training and deployment of custom Chainer code."""
3636

37-
_framework_name = "chainer"
37+
_framework_name: str = "chainer"
3838

3939
# Hyperparameters
40-
_use_mpi = "sagemaker_use_mpi"
41-
_num_processes = "sagemaker_num_processes"
42-
_process_slots_per_host = "sagemaker_process_slots_per_host"
43-
_additional_mpi_options = "sagemaker_additional_mpi_options"
40+
_use_mpi: str = "sagemaker_use_mpi"
41+
_num_processes: str = "sagemaker_num_processes"
42+
_process_slots_per_host: str = "sagemaker_process_slots_per_host"
43+
_additional_mpi_options: str = "sagemaker_additional_mpi_options"
4444

4545
def __init__(
4646
self,
4747
entry_point: Union[str, PipelineVariable],
48-
use_mpi=None,
49-
num_processes=None,
50-
process_slots_per_host=None,
51-
additional_mpi_options=None,
48+
use_mpi: Optional[Union[bool, PipelineVariable]] = None,
49+
num_processes: Optional[Union[int, PipelineVariable]] = None,
50+
process_slots_per_host: Optional[Union[int, PipelineVariable]] = None,
51+
additional_mpi_options: Optional[Union[str, PipelineVariable]] = None,
5252
source_dir: Optional[Union[str, PipelineVariable]] = None,
53-
hyperparameters=None,
54-
framework_version=None,
55-
py_version=None,
56-
image_uri=None,
53+
hyperparameters: Optional[Dict[str, Union[str, PipelineVariable]]] = None,
54+
framework_version: Optional[str] = None,
55+
py_version: Optional[str] = None,
56+
image_uri: Optional[Union[str, PipelineVariable]] = None,
5757
**kwargs
5858
):
5959
"""This ``Estimator`` executes an Chainer script in a managed execution environment.
@@ -78,26 +78,26 @@ def __init__(
7878
file which should be executed as the entry point to training.
7979
If ``source_dir`` is specified, then ``entry_point``
8080
must point to a file located at the root of ``source_dir``.
81-
use_mpi (bool): If true, entry point is run as an MPI script. By
81+
use_mpi (bool or PipelineVariable): If true, entry point is run as an MPI script. By
8282
default, the Chainer Framework runs the entry point with
8383
'mpirun' if more than one instance is used.
84-
num_processes (int): Total number of processes to run the entry
84+
num_processes (int or PipelineVariable): Total number of processes to run the entry
8585
point with. By default, the Chainer Framework runs one process
8686
per GPU (on GPU instances), or one process per host (on CPU
8787
instances).
88-
process_slots_per_host (int): The number of processes that can run
88+
process_slots_per_host (int or PipelineVariable): The number of processes that can run
8989
on each instance. By default, this is set to the number of GPUs
9090
on the instance (on GPU instances), or one (on CPU instances).
91-
additional_mpi_options (str): String of options to the 'mpirun'
91+
additional_mpi_options (str or PipelineVariable): String of options to the 'mpirun'
9292
command used to run the entry point. For example, '-X
9393
NCCL_DEBUG=WARN' will pass that option string to the mpirun
9494
command.
9595
source_dir (str or PipelineVariable): Path (absolute or relative) to a directory with
9696
any other training source code dependencies aside from the entry
9797
point file (default: None). Structure within this directory are
9898
preserved when training on Amazon SageMaker.
99-
hyperparameters (dict): Hyperparameters that will be used for
100-
training (default: None). The hyperparameters are made
99+
hyperparameters (dict[str, str] or dict[str, PipelineVariable]): Hyperparameters
100+
that will be used for training (default: None). The hyperparameters are made
101101
accessible as a dict[str, str] to the training code on
102102
SageMaker. For convenience, this accepts other types for keys
103103
and values, but ``str()`` will be called to convert them before

src/sagemaker/image_uri_config/neo-tensorflow.json

+8-5
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,13 @@
1313
"1.12.0": "1.15.3",
1414
"1.13.0": "1.15.3",
1515
"1.14.0": "1.15.3",
16-
"2.4": "2.4.2",
17-
"2.4.0": "2.4.2",
18-
"2.4.1": "2.4.2",
19-
"2.4.2": "2.4.2"
16+
"2.4": "2.9.2",
17+
"2.4.0": "2.9.2",
18+
"2.4.1": "2.9.2",
19+
"2.4.2": "2.9.2",
20+
"2.9": "2.9.2",
21+
"2.9.0": "2.9.2",
22+
"2.9.2": "2.9.2"
2023
},
2124
"versions": {
2225
"1.15.3": {
@@ -49,7 +52,7 @@
4952
},
5053
"repository": "sagemaker-inference-tensorflow"
5154
},
52-
"2.4.2": {
55+
"2.9.2": {
5356
"py_versions": ["py3"],
5457
"registries": {
5558
"af-south-1": "774647643957",

0 commit comments

Comments
 (0)