Skip to content

Commit 560042f

Browse files
authored
upgrade to latest sagemaker-experiments (#163)
1 parent 01cfdc7 commit 560042f

File tree

4 files changed

+97
-4
lines changed

4 files changed

+97
-4
lines changed

docker/1.4.0/py3/Dockerfile.cpu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pytho
9696
scipy==1.2.2 \
9797
smdebug==0.5.0.post0 \
9898
sagemaker==1.50.17 \
99-
sagemaker-experiments==0.1.3 \
99+
sagemaker-experiments==0.1.7 \
100100
/sagemaker_pytorch_training.tar.gz \
101101
&& pip install --no-cache-dir -U https://pytorch-aws.s3.amazonaws.com/pytorch-1.4.0/py3/cpu/torch-1.4.0-cp36-cp36m-manylinux1_x86_64.whl \
102102
&& pip uninstall -y torchvision \

docker/1.4.0/py3/Dockerfile.gpu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ RUN /opt/conda/bin/conda config --set ssl_verify False \
120120
RUN pip install \
121121
--no-cache-dir smdebug==0.5.0.post0 \
122122
sagemaker==1.50.17 \
123-
sagemaker-experiments==0.1.3 \
123+
sagemaker-experiments==0.1.7 \
124124
--no-cache-dir fastai==1.0.59 \
125125
awscli \
126126
scipy==1.2.2 \

setup.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,9 @@ def read(fname):
5050

5151
install_requires=['retrying', 'sagemaker-containers>=2.6.2', 'six>=1.12.0'],
5252
extras_require={
53-
'test': ['boto3==1.9.169', 'coverage==4.5.3', 'docker-compose==1.23.2', 'flake8==3.7.7', 'Flask==1.1.1',
53+
'test': ['boto3==1.10.32', 'coverage==4.5.3', 'docker-compose==1.23.2', 'flake8==3.7.7', 'Flask==1.1.1',
5454
'mock==2.0.0', 'pytest==4.4.0', 'pytest-cov==2.7.1', 'pytest-xdist==1.28.0', 'PyYAML==3.10',
55-
'sagemaker==1.28.1', 'torch==1.4.0', 'torchvision==0.5.0', 'tox==3.7.0', 'requests_mock==1.6.0']
55+
'sagemaker==1.28.1', 'torch==1.4.0', 'torchvision==0.5.0', 'tox==3.7.0', 'requests_mock==1.6.0',
56+
'sagemaker-experiments==0.1.7']
5657
},
5758
)
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License").
4+
# You may not use this file except in compliance with the License.
5+
# A copy of the License is located at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# or in the "license" file accompanying this file. This file is distributed
10+
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
11+
# express or implied. See the License for the specific language governing
12+
# permissions and limitations under the License.
13+
from __future__ import absolute_import
14+
15+
import time
16+
17+
import pytest
18+
from sagemaker.pytorch import PyTorch
19+
from sagemaker import utils
20+
from smexperiments.experiment import Experiment
21+
from smexperiments.trial import Trial
22+
from smexperiments.trial_component import TrialComponent
23+
from test.integration import training_dir, smdebug_mnist_script, DEFAULT_TIMEOUT
24+
from test.integration.sagemaker.timeout import timeout
25+
26+
27+
@pytest.mark.skip_py2_containers
28+
def test_training(sagemaker_session, ecr_image, instance_type):
29+
30+
sm_client = sagemaker_session.sagemaker_client
31+
32+
experiment_name = "pytorch-container-integ-test-{}".format(int(time.time()))
33+
34+
experiment = Experiment.create(
35+
experiment_name=experiment_name,
36+
description="Integration test full customer e2e from sagemaker-pytorch-container",
37+
sagemaker_boto_client=sm_client,
38+
)
39+
40+
trial_name = "pytorch-container-integ-test-{}".format(int(time.time()))
41+
trial = Trial.create(
42+
experiment_name=experiment_name, trial_name=trial_name, sagemaker_boto_client=sm_client
43+
)
44+
45+
hyperparameters = {
46+
"random_seed": True,
47+
"num_steps": 50,
48+
"smdebug_path": "/opt/ml/output/tensors",
49+
"epochs": 1,
50+
"data_dir": training_dir,
51+
}
52+
53+
training_job_name = utils.unique_name_from_base("test-pytorch-experiments-image")
54+
55+
# create a training job and wait for it to complete
56+
with timeout(minutes=DEFAULT_TIMEOUT):
57+
pytorch = PyTorch(
58+
entry_point=smdebug_mnist_script,
59+
role="SageMakerRole",
60+
train_instance_count=1,
61+
train_instance_type=instance_type,
62+
sagemaker_session=sagemaker_session,
63+
image_name=ecr_image,
64+
hyperparameters=hyperparameters,
65+
)
66+
training_input = pytorch.sagemaker_session.upload_data(
67+
path=training_dir, key_prefix="pytorch/mnist"
68+
)
69+
pytorch.fit({"training": training_input}, job_name=training_job_name)
70+
71+
training_job = sm_client.describe_training_job(TrainingJobName=training_job_name)
72+
training_job_arn = training_job["TrainingJobArn"]
73+
74+
# verify trial component auto created from the training job
75+
trial_components = list(
76+
TrialComponent.list(source_arn=training_job_arn, sagemaker_boto_client=sm_client)
77+
)
78+
79+
trial_component_summary = trial_components[0]
80+
trial_component = TrialComponent.load(
81+
trial_component_name=trial_component_summary.trial_component_name,
82+
sagemaker_boto_client=sm_client,
83+
)
84+
85+
# associate the trial component with the trial
86+
trial.add_trial_component(trial_component)
87+
88+
# cleanup
89+
trial.remove_trial_component(trial_component_summary.trial_component_name)
90+
trial_component.delete()
91+
trial.delete()
92+
experiment.delete()

0 commit comments

Comments
 (0)