Skip to content

Commit a7df911

Browse files
authored
Merge branch 'master' into mask-creds-local-mode
2 parents 0e24c9e + 9b86920 commit a7df911

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+372
-197
lines changed

CHANGELOG.md

+44
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,49 @@
11
# Changelog
22

3+
## v2.25.1 (2021-02-20)
4+
5+
### Bug Fixes and Other Changes
6+
7+
* Add tests for visualizer to improve test coverage
8+
9+
### Documentation Changes
10+
11+
* specify correct return type
12+
13+
### Testing and Release Infrastructure
14+
15+
* rename canary_quick pytest mark to release
16+
17+
## v2.25.0 (2021-02-19)
18+
19+
### Features
20+
21+
* Enable step caching
22+
* Add other Neo supported regions for Inferentia inference images
23+
24+
### Bug Fixes and Other Changes
25+
26+
* remove FailStep from pipelines
27+
* use sagemaker_session in workflow tests
28+
* use ECR public for multidatamodel tests
29+
* add the mapping from py3 to cuda11 images
30+
* Add 30s cap time for tag tests
31+
* add build spec for slow tests
32+
* mark top 10 slow tests
33+
* remove slow test_run_xxx_monitor_baseline tests
34+
* pin astroid to 2.4.2
35+
36+
### Testing and Release Infrastructure
37+
38+
* unmark more flaky integ tests
39+
* remove canary_quick pytest mark from flaky/unnecessary tests
40+
* remove python3.8 from buildspec
41+
* remove py38 tox env
42+
* fix release buildspec typo
43+
* unblock regional release builds
44+
* lower test TPS for experiment analytics
45+
* move package preparation and publishing to the deploy step
46+
347
## v2.24.5 (2021-02-12)
448

549
### Bug Fixes and Other Changes

VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.24.6.dev0
1+
2.25.2.dev0

buildspec-localmodetests.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@ phases:
1111

1212
# local mode tests
1313
- start_time=`date +%s`
14-
- execute-command-if-has-matching-changes "tox -e py38 -- tests/integ -m local_mode --durations 50" "tests/integ" "tests/data" "tests/conftest.py" "tests/__init__.py" "src/*.py" "setup.py" "setup.cfg" "buildspec-localmodetests.yml"
15-
- ./ci-scripts/displaytime.sh 'py38 local mode' $start_time
14+
- execute-command-if-has-matching-changes "tox -e py37 -- tests/integ -m local_mode --durations 50" "tests/integ" "tests/data" "tests/conftest.py" "tests/__init__.py" "src/*.py" "setup.py" "setup.cfg" "buildspec-localmodetests.yml"
15+
- ./ci-scripts/displaytime.sh 'py37 local mode' $start_time

buildspec-release.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ phases:
1515
# run unit tests
1616
- AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= AWS_SESSION_TOKEN=
1717
AWS_CONTAINER_CREDENTIALS_RELATIVE_URI= AWS_DEFAULT_REGION=
18-
tox -e py36,py37,py38 -- tests/unit
18+
tox -e py36,py37 -- tests/unit
1919

2020
# run a subset of the integration tests
21-
- IGNORE_COVERAGE=- tox -e py36 -- tests/integ -m canary_quick -n 64 --boxed --reruns 2
21+
- IGNORE_COVERAGE=- tox -e py36 -- tests/integ -m "not (local_mode or slow_test)" -n 32 --boxed --reruns 2

buildspec-slowtests.yml

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
version: 0.2
22

33
phases:
4+
pre_build:
5+
commands:
6+
- start-dockerd
7+
48
build:
59
commands:
610
- IGNORE_COVERAGE=-
711

812
# slow tests
913
- start_time=`date +%s`
10-
- execute-command-if-has-matching-changes "tox -e py38 -- tests/integ -m slow_test -n 10 --durations 0" "tests/integ" "tests/data" "tests/conftest.py" "tests/__init__.py" "src/*.py" "setup.py" "setup.cfg" "buildspec-slowtests.yml"
11-
- ./ci-scripts/displaytime.sh 'py38 slow tests' $start_time
14+
- execute-command-if-has-matching-changes "tox -e py37 -- tests/integ -m slow_test -n 16 --durations 0" "tests/integ" "tests/data" "tests/conftest.py" "tests/__init__.py" "src/*.py" "setup.py" "setup.cfg" "buildspec-slowtests.yml"
15+
- ./ci-scripts/displaytime.sh 'py37 slow tests' $start_time

buildspec-unittests.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,5 @@ phases:
1818
- start_time=`date +%s`
1919
- AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= AWS_SESSION_TOKEN=
2020
AWS_CONTAINER_CREDENTIALS_RELATIVE_URI= AWS_DEFAULT_REGION=
21-
tox -e py36,py37,py38 --parallel all -- tests/unit
22-
- ./ci-scripts/displaytime.sh 'py36,py37,py38 unit' $start_time
21+
tox -e py36,py37 --parallel all -- tests/unit
22+
- ./ci-scripts/displaytime.sh 'py36,py37 unit' $start_time

buildspec.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ phases:
1111

1212
# run integration tests
1313
- start_time=`date +%s`
14-
- execute-command-if-has-matching-changes "python3.8 -u ci-scripts/queue_build.py" "tests/integ" "tests/scripts" "tests/data" "tests/conftest.py" "tests/__init__.py" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml"
14+
- execute-command-if-has-matching-changes "python3.7 -u ci-scripts/queue_build.py" "tests/integ" "tests/scripts" "tests/data" "tests/conftest.py" "tests/__init__.py" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml"
1515
- ./ci-scripts/displaytime.sh 'build queue' $start_time
1616

1717
- start_time=`date +%s`
1818
- |
19-
execute-command-if-has-matching-changes "env -u AWS_DEFAULT_REGION tox -e py38 -- tests/integ -m \"not local_mode and not cron and not slow_test\" -n 384 --reruns 3 --reruns-delay 15 --durations 50 --boto-config '{\"region_name\": \"us-east-2\"}'" "tests/integ" "tests/scripts" "tests/data" "tests/conftest.py" "tests/__init__.py" "src/*.py" "src/sagemaker/image_uri_config/*.json" "setup.py" "setup.cfg" "buildspec.yml"
20-
- ./ci-scripts/displaytime.sh 'py38 tests/integ' $start_time
19+
execute-command-if-has-matching-changes "env -u AWS_DEFAULT_REGION tox -e py37 -- tests/integ -m \"not local_mode and not cron and not slow_test\" -n 384 --reruns 3 --reruns-delay 15 --durations 50 --boto-config '{\"region_name\": \"us-east-2\"}'" "tests/integ" "tests/scripts" "tests/data" "tests/conftest.py" "tests/__init__.py" "src/*.py" "src/sagemaker/image_uri_config/*.json" "setup.py" "setup.cfg" "buildspec.yml"
20+
- ./ci-scripts/displaytime.sh 'py37 tests/integ' $start_time
2121

2222
post_build:
2323
finally:

doc/workflows/pipelines/sagemaker.workflow.pipelines.rst

-2
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,6 @@ Steps
110110

111111
.. autoclass:: sagemaker.workflow.steps.ProcessingStep
112112

113-
.. autoclass:: sagemaker.workflow.steps.FailStep
114-
115113
Utilities
116114
---------
117115

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ def read_version():
6666
"pytest<6.1.0",
6767
"pytest-cov",
6868
"pytest-rerunfailures",
69+
"pytest-timeout",
6970
"pytest-xdist",
7071
"mock",
7172
"contextlib2",

src/sagemaker/image_uris.py

+2
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ def retrieve(
9898
"mxnet-1.8.0-gpu-py37": "cu110-ubuntu16.04",
9999
"pytorch-1.6-gpu-py36": "cu110-ubuntu18.04-v3",
100100
"pytorch-1.6.0-gpu-py36": "cu110-ubuntu18.04",
101+
"pytorch-1.6-gpu-py3": "cu110-ubuntu18.04-v3",
102+
"pytorch-1.6.0-gpu-py3": "cu110-ubuntu18.04",
101103
}
102104
key = "-".join([framework, tag])
103105
if key in container_versions:

src/sagemaker/model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def register(
146146
description (str): Model Package description (default: None).
147147
148148
Returns:
149-
str: A string of SageMaker Model Package ARN.
149+
A `sagemaker.model.ModelPackage` instance.
150150
"""
151151
if self.model_data is None:
152152
raise ValueError("SageMaker Model Package cannot be created without model data.")

src/sagemaker/mxnet/model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def register(
183183
description (str): Model Package description (default: None).
184184
185185
Returns:
186-
str: A string of SageMaker Model Package ARN.
186+
A `sagemaker.model.ModelPackage` instance.
187187
"""
188188
instance_type = inference_instances[0]
189189
self._init_sagemaker_session_if_does_not_exist(instance_type)

src/sagemaker/pytorch/model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ def register(
182182
description (str): Model Package description (default: None).
183183
184184
Returns:
185-
str: A string of SageMaker Model Package ARN.
185+
A `sagemaker.model.ModelPackage` instance.
186186
"""
187187
instance_type = inference_instances[0]
188188
self._init_sagemaker_session_if_does_not_exist(instance_type)

src/sagemaker/tensorflow/model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ def register(
226226
description (str): Model Package description (default: None).
227227
228228
Returns:
229-
str: A string of SageMaker Model Package ARN.
229+
A `sagemaker.model.ModelPackage` instance.
230230
"""
231231
instance_type = inference_instances[0]
232232
self._init_sagemaker_session_if_does_not_exist(instance_type)

src/sagemaker/workflow/steps.py

+59-44
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ class StepTypeEnum(Enum, metaclass=DefaultEnumMeta):
5050

5151
CONDITION = "Condition"
5252
CREATE_MODEL = "Model"
53-
FAIL = "Fail"
5453
PROCESSING = "Processing"
5554
REGISTER_MODEL = "RegisterModel"
5655
TRAINING = "Training"
@@ -93,6 +92,38 @@ def ref(self) -> Dict[str, str]:
9392
return {"Name": self.name}
9493

9594

95+
@attr.s
96+
class CacheConfig:
97+
"""Configuration class to enable caching in pipeline workflow.
98+
99+
If caching is enabled, the pipeline attempts to find a previous execution of a step
100+
that was called with the same arguments. Step caching only considers successful execution.
101+
If a successful previous execution is found, the pipeline propagates the values
102+
from previous execution rather than recomputing the step. When multiple successful executions
103+
exist within the timeout period, it uses the result for the most recent successful execution.
104+
105+
106+
Attributes:
107+
enable_caching (bool): To enable step caching. Defaults to `False`.
108+
expire_after (str): If step caching is enabled, a timeout also needs to defined.
109+
It defines how old a previous execution can be to be considered for reuse.
110+
Value should be an ISO 8601 duration string. Defaults to `None`.
111+
"""
112+
113+
enable_caching: bool = attr.ib(default=False)
114+
expire_after = attr.ib(
115+
default=None, validator=attr.validators.optional(attr.validators.instance_of(str))
116+
)
117+
118+
@property
119+
def config(self):
120+
"""Configures caching in pipeline steps."""
121+
config = {"Enabled": self.enable_caching}
122+
if self.expire_after is not None:
123+
config["ExpireAfter"] = self.expire_after
124+
return {"CacheConfig": config}
125+
126+
96127
class TrainingStep(Step):
97128
"""Training step for workflow."""
98129

@@ -101,6 +132,7 @@ def __init__(
101132
name: str,
102133
estimator: EstimatorBase,
103134
inputs: TrainingInput = None,
135+
cache_config: CacheConfig = None,
104136
):
105137
"""Construct a TrainingStep, given an `EstimatorBase` instance.
106138
@@ -111,14 +143,15 @@ def __init__(
111143
name (str): The name of the training step.
112144
estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance.
113145
inputs (TrainingInput): A `sagemaker.inputs.TrainingInput` instance. Defaults to `None`.
146+
cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance.
114147
"""
115148
super(TrainingStep, self).__init__(name, StepTypeEnum.TRAINING)
116149
self.estimator = estimator
117150
self.inputs = inputs
118-
119151
self._properties = Properties(
120152
path=f"Steps.{name}", shape_name="DescribeTrainingJobResponse"
121153
)
154+
self.cache_config = cache_config
122155

123156
@property
124157
def arguments(self) -> RequestType:
@@ -145,6 +178,14 @@ def properties(self):
145178
"""A Properties object representing the DescribeTrainingJobResponse data model."""
146179
return self._properties
147180

181+
def to_request(self) -> RequestType:
182+
"""Updates the dictionary with cache configuration."""
183+
request_dict = super().to_request()
184+
if self.cache_config:
185+
request_dict.update(self.cache_config.config)
186+
187+
return request_dict
188+
148189

149190
class CreateModelStep(Step):
150191
"""CreateModel step for workflow."""
@@ -208,6 +249,7 @@ def __init__(
208249
name: str,
209250
transformer: Transformer,
210251
inputs: TransformInput,
252+
cache_config: CacheConfig = None,
211253
):
212254
"""Constructs a TransformStep, given an `Transformer` instance.
213255
@@ -218,11 +260,12 @@ def __init__(
218260
name (str): The name of the transform step.
219261
transformer (Transformer): A `sagemaker.transformer.Transformer` instance.
220262
inputs (TransformInput): A `sagemaker.inputs.TransformInput` instance.
263+
cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance.
221264
"""
222265
super(TransformStep, self).__init__(name, StepTypeEnum.TRANSFORM)
223266
self.transformer = transformer
224267
self.inputs = inputs
225-
268+
self.cache_config = cache_config
226269
self._properties = Properties(
227270
path=f"Steps.{name}", shape_name="DescribeTransformJobResponse"
228271
)
@@ -258,6 +301,14 @@ def properties(self):
258301
"""A Properties object representing the DescribeTransformJobResponse data model."""
259302
return self._properties
260303

304+
def to_request(self) -> RequestType:
305+
"""Updates the dictionary with cache configuration."""
306+
request_dict = super().to_request()
307+
if self.cache_config:
308+
request_dict.update(self.cache_config.config)
309+
310+
return request_dict
311+
261312

262313
class ProcessingStep(Step):
263314
"""Processing step for workflow."""
@@ -271,6 +322,7 @@ def __init__(
271322
job_arguments: List[str] = None,
272323
code: str = None,
273324
property_files: List[PropertyFile] = None,
325+
cache_config: CacheConfig = None,
274326
):
275327
"""Construct a ProcessingStep, given a `Processor` instance.
276328
@@ -290,6 +342,7 @@ def __init__(
290342
script to run. Defaults to `None`.
291343
property_files (List[PropertyFile]): A list of property files that workflow looks
292344
for and resolves from the configured processing output list.
345+
cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance.
293346
"""
294347
super(ProcessingStep, self).__init__(name, StepTypeEnum.PROCESSING)
295348
self.processor = processor
@@ -306,6 +359,7 @@ def __init__(
306359
self._properties = Properties(
307360
path=f"Steps.{name}", shape_name="DescribeProcessingJobResponse"
308361
)
362+
self.cache_config = cache_config
309363

310364
@property
311365
def arguments(self) -> RequestType:
@@ -336,49 +390,10 @@ def properties(self):
336390
def to_request(self) -> RequestType:
337391
"""Get the request structure for workflow service calls."""
338392
request_dict = super(ProcessingStep, self).to_request()
393+
if self.cache_config:
394+
request_dict.update(self.cache_config.config)
339395
if self.property_files:
340396
request_dict["PropertyFiles"] = [
341397
property_file.expr for property_file in self.property_files
342398
]
343399
return request_dict
344-
345-
346-
class FailStep(Step):
347-
"""Pipeline step to indicate failure."""
348-
349-
def __init__(self, name: str = "Fail"):
350-
"""Construct a FailStep.
351-
352-
Causes the pipeline execution to terminate in a failed state.
353-
354-
Args:
355-
name (str): The name of the step.
356-
"""
357-
super(FailStep, self).__init__(name, StepTypeEnum.FAIL)
358-
root_path = f"Steps.{name}"
359-
root_prop = Properties(path=root_path)
360-
root_prop.__dict__["Fail"] = Properties(f"{root_path}.Fail")
361-
self._properties = root_prop
362-
363-
@property
364-
def arguments(self) -> RequestType:
365-
"""The arguments to the particular step service call."""
366-
return {}
367-
368-
@property
369-
def properties(self):
370-
"""The properties of the particular step."""
371-
return self._properties
372-
373-
def to_request(self) -> RequestType:
374-
"""Get the request structure for workflow service calls."""
375-
return {
376-
"Name": self.name,
377-
"Type": self.step_type.value,
378-
"Arguments": self.arguments,
379-
}
380-
381-
@property
382-
def ref(self) -> Dict[str, str]:
383-
"""Get a reference dict for steps"""
384-
return {"Name": self.name}

tests/data/multimodel/container/Dockerfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM 142577830533.dkr.ecr.us-east-2.amazonaws.com/ubuntu:16.04
1+
FROM public.ecr.aws/ubuntu/ubuntu:18.04
22

33
# Set a docker label to advertise multi-model support on the container
44
LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
@@ -15,7 +15,7 @@ RUN apt-get update && \
1515
curl \
1616
vim \
1717
&& rm -rf /var/lib/apt/lists/* \
18-
&& curl -O https://bootstrap.pypa.io/3.5/get-pip.py \
18+
&& curl -O https://bootstrap.pypa.io/get-pip.py \
1919
&& python3 get-pip.py
2020

2121
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1

0 commit comments

Comments
 (0)