Skip to content

fix tests with incorrect region-skipping code #537

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 9, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 22 additions & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,21 @@
from __future__ import absolute_import

import json
import os

import boto3
import pytest
from botocore.config import Config

from sagemaker import Session
from sagemaker.local import LocalSession
from sagemaker.chainer import Chainer
from sagemaker.local import LocalSession
from sagemaker.mxnet import MXNet
from sagemaker.rl import RLEstimator
from sagemaker.pytorch.defaults import PYTORCH_VERSION
from sagemaker.rl import RLEstimator
from sagemaker.sklearn.defaults import SKLEARN_VERSION
from sagemaker.tensorflow.defaults import TF_VERSION


DEFAULT_REGION = 'us-west-2'


Expand All @@ -38,12 +38,23 @@ def pytest_addoption(parser):
parser.addoption('--chainer-full-version', action='store', default=Chainer.LATEST_VERSION)
parser.addoption('--mxnet-full-version', action='store', default=MXNet.LATEST_VERSION)
parser.addoption('--pytorch-full-version', action='store', default=PYTORCH_VERSION)
parser.addoption('--rl-coach-full-version', action='store', default=RLEstimator.COACH_LATEST_VERSION)
parser.addoption('--rl-ray-full-version', action='store', default=RLEstimator.RAY_LATEST_VERSION)
parser.addoption('--rl-coach-full-version', action='store',
default=RLEstimator.COACH_LATEST_VERSION)
parser.addoption('--rl-ray-full-version', action='store',
default=RLEstimator.RAY_LATEST_VERSION)
parser.addoption('--sklearn-full-version', action='store', default=SKLEARN_VERSION)
parser.addoption('--tf-full-version', action='store', default=TF_VERSION)


def pytest_configure(config):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this go in conftest.py or __init__.py instead of individual test files or is there a reason it's specific to this file?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, it must be in this file for pytest to call it (and set the env var) at the correct time. stuff in init.py doesn't have access to the pytest config (part of the original problem).

bc = config.getoption('--boto-config')
parsed = json.loads(bc) if bc else {}
region = parsed.get('region_name', boto3.session.Session().region_name)

if region:
os.environ['TEST_AWS_REGION_NAME'] = region


@pytest.fixture(scope='session')
def sagemaker_client_config(request):
config = request.config.getoption('--sagemaker-client-config')
Expand All @@ -64,10 +75,13 @@ def boto_config(request):

@pytest.fixture(scope='session')
def sagemaker_session(sagemaker_client_config, sagemaker_runtime_config, boto_config):
boto_session = boto3.Session(**boto_config) if boto_config else boto3.Session(region_name=DEFAULT_REGION)
boto_session = boto3.Session(**boto_config) if boto_config else boto3.Session(
region_name=DEFAULT_REGION)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not in scope of this PR, but seeing this and scanning through conftest.py makes me think we should make a boto_session fixture

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agree, there should be another PR for this.

sagemaker_client_config.setdefault('config', Config(retries=dict(max_attempts=10)))
sagemaker_client = boto_session.client('sagemaker', **sagemaker_client_config) if sagemaker_client_config else None
runtime_client = (boto_session.client('sagemaker-runtime', **sagemaker_runtime_config) if sagemaker_runtime_config
sagemaker_client = boto_session.client('sagemaker',
**sagemaker_client_config) if sagemaker_client_config else None
runtime_client = (boto_session.client('sagemaker-runtime',
**sagemaker_runtime_config) if sagemaker_runtime_config
else None)

return Session(boto_session=boto_session,
Expand Down
6 changes: 4 additions & 2 deletions tests/integ/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@
TUNING_DEFAULT_TIMEOUT_MINUTES = 20
TRANSFORM_DEFAULT_TIMEOUT_MINUTES = 20
PYTHON_VERSION = 'py' + str(sys.version_info.major)
REGION = boto3.session.Session().region_name

HOSTING_P2_UNAVAILABLE_REGIONS = ['ca-central-1', 'us-west-1', 'eu-west-2']
HOSTING_P3_UNAVAILABLE_REGIONS = ['ap-southeast-1', 'ap-southeast-2', 'ap-south-1', 'ca-central-1',
'us-west-1']

logging.getLogger('boto3').setLevel(logging.INFO)
logging.getLogger('botocore').setLevel(logging.INFO)


def test_region():
return os.environ.get('TEST_AWS_REGION_NAME', boto3.session.Session().region_name)
5 changes: 3 additions & 2 deletions tests/integ/test_chainer_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
from sagemaker.chainer.estimator import Chainer
from sagemaker.chainer.model import ChainerModel
from sagemaker.utils import sagemaker_timestamp
from tests.integ import DATA_DIR, PYTHON_VERSION, TRAINING_DEFAULT_TIMEOUT_MINUTES, REGION
import tests.integ
from tests.integ import DATA_DIR, PYTHON_VERSION, TRAINING_DEFAULT_TIMEOUT_MINUTES
from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name


Expand All @@ -35,7 +36,7 @@ def test_distributed_cpu_training(sagemaker_session, chainer_full_version):
_run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 2, chainer_full_version)


@pytest.mark.skipif(REGION in ['us-west-1', 'eu-west-2', 'ca-central-1'],
@pytest.mark.skipif(tests.integ.test_region() in ['us-west-1', 'eu-west-2', 'ca-central-1'],
reason='No ml.p2.xlarge supported in these regions')
def test_distributed_gpu_training(sagemaker_session, chainer_full_version):
_run_mnist_training_job(sagemaker_session, "ml.p2.xlarge", 2, chainer_full_version)
Expand Down
30 changes: 20 additions & 10 deletions tests/integ/test_pytorch_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,19 @@
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import
import numpy

import os
import time

import numpy
import pytest
import tests.integ
from tests.integ import DATA_DIR, PYTHON_VERSION, TRAINING_DEFAULT_TIMEOUT_MINUTES
from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name

from sagemaker.pytorch.estimator import PyTorch
from sagemaker.pytorch.model import PyTorchModel
from sagemaker.utils import sagemaker_timestamp
from tests.integ import DATA_DIR, PYTHON_VERSION, TRAINING_DEFAULT_TIMEOUT_MINUTES, REGION
from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name

MNIST_DIR = os.path.join(DATA_DIR, 'pytorch_mnist')
MNIST_SCRIPT = os.path.join(MNIST_DIR, 'mnist.py')
Expand Down Expand Up @@ -57,9 +61,11 @@ def test_deploy_model(pytorch_training_job, sagemaker_session):
endpoint_name = 'test-pytorch-deploy-model-{}'.format(sagemaker_timestamp())

with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=pytorch_training_job)
desc = sagemaker_session.sagemaker_client.describe_training_job(
TrainingJobName=pytorch_training_job)
model_data = desc['ModelArtifacts']['S3ModelArtifacts']
model = PyTorchModel(model_data, 'SageMakerRole', entry_point=MNIST_SCRIPT, sagemaker_session=sagemaker_session)
model = PyTorchModel(model_data, 'SageMakerRole', entry_point=MNIST_SCRIPT,
sagemaker_session=sagemaker_session)
predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)

batch_size = 100
Expand All @@ -69,7 +75,7 @@ def test_deploy_model(pytorch_training_job, sagemaker_session):
assert output.shape == (batch_size, 10)


@pytest.mark.skipif(REGION in ['us-west-1', 'eu-west-2', 'ca-central-1'],
@pytest.mark.skipif(tests.integ.test_region() in ['us-west-1', 'eu-west-2', 'ca-central-1'],
reason='No ml.p2.xlarge supported in these regions')
def test_async_fit_deploy(sagemaker_session, pytorch_full_version):
training_job_name = ""
Expand All @@ -90,7 +96,8 @@ def test_async_fit_deploy(sagemaker_session, pytorch_full_version):

with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
print("Re-attaching now to: %s" % training_job_name)
estimator = PyTorch.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
estimator = PyTorch.attach(training_job_name=training_job_name,
sagemaker_session=sagemaker_session)
predictor = estimator.deploy(1, instance_type, endpoint_name=endpoint_name)

batch_size = 100
Expand All @@ -105,7 +112,8 @@ def test_failed_training_job(sagemaker_session, pytorch_full_version):
script_path = os.path.join(MNIST_DIR, 'failure_script.py')

with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
pytorch = _get_pytorch_estimator(sagemaker_session, pytorch_full_version, entry_point=script_path)
pytorch = _get_pytorch_estimator(sagemaker_session, pytorch_full_version,
entry_point=script_path)

with pytest.raises(ValueError) as e:
pytorch.fit()
Expand All @@ -119,8 +127,10 @@ def _upload_training_data(pytorch):

def _get_pytorch_estimator(sagemaker_session, pytorch_full_version, instance_type='ml.c4.xlarge',
entry_point=MNIST_SCRIPT):
return PyTorch(entry_point=entry_point, role='SageMakerRole', framework_version=pytorch_full_version,
py_version=PYTHON_VERSION, train_instance_count=1, train_instance_type=instance_type,
return PyTorch(entry_point=entry_point, role='SageMakerRole',
framework_version=pytorch_full_version,
py_version=PYTHON_VERSION, train_instance_count=1,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session)


Expand Down
23 changes: 14 additions & 9 deletions tests/integ/test_tf_cifar.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@
import numpy as np
import pytest

from sagemaker.tensorflow import TensorFlow
from tests.integ import DATA_DIR, PYTHON_VERSION, REGION
import tests.integ
from tests.integ.timeout import timeout_and_delete_endpoint_by_name, timeout

from sagemaker.tensorflow import TensorFlow

PICKLE_CONTENT_TYPE = 'application/python-pickle'


Expand All @@ -34,22 +35,26 @@ def __call__(self, data):


@pytest.mark.continuous_testing
@pytest.mark.skipif(PYTHON_VERSION != 'py2', reason="TensorFlow image supports only python 2.")
@pytest.mark.skipif(REGION in ['us-west-1', 'eu-west-2', 'ca-central-1'],
@pytest.mark.skipif(tests.integ.PYTHON_VERSION != 'py2',
reason="TensorFlow image supports only python 2.")
@pytest.mark.skipif(tests.integ.test_region() in ['us-west-1', 'eu-west-2', 'ca-central-1'],
reason='No ml.p2.xlarge supported in these regions')
def test_cifar(sagemaker_session, tf_full_version):
with timeout(minutes=45):
script_path = os.path.join(DATA_DIR, 'cifar_10', 'source')
script_path = os.path.join(tests.integ.DATA_DIR, 'cifar_10', 'source')

dataset_path = os.path.join(DATA_DIR, 'cifar_10', 'data')
dataset_path = os.path.join(tests.integ.DATA_DIR, 'cifar_10', 'data')

estimator = TensorFlow(entry_point='resnet_cifar_10.py', source_dir=script_path, role='SageMakerRole',
framework_version=tf_full_version, training_steps=500, evaluation_steps=5,
estimator = TensorFlow(entry_point='resnet_cifar_10.py', source_dir=script_path,
role='SageMakerRole',
framework_version=tf_full_version, training_steps=500,
evaluation_steps=5,
train_instance_count=2, train_instance_type='ml.p2.xlarge',
sagemaker_session=sagemaker_session, train_max_run=45 * 60,
base_job_name='test-cifar')

inputs = estimator.sagemaker_session.upload_data(path=dataset_path, key_prefix='data/cifar10')
inputs = estimator.sagemaker_session.upload_data(path=dataset_path,
key_prefix='data/cifar10')
estimator.fit(inputs, logs=False)
print('job succeeded: {}'.format(estimator.latest_training_job.name))

Expand Down
17 changes: 10 additions & 7 deletions tests/integ/test_tf_keras.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,20 @@
import numpy as np
import pytest

from sagemaker.tensorflow import TensorFlow
from tests.integ import DATA_DIR, PYTHON_VERSION, REGION
import tests.integ
from tests.integ.timeout import timeout_and_delete_endpoint_by_name, timeout

from sagemaker.tensorflow import TensorFlow


@pytest.mark.continuous_testing
@pytest.mark.skipif(PYTHON_VERSION != 'py2', reason="TensorFlow image supports only python 2.")
@pytest.mark.skipif(REGION in ['us-west-1', 'eu-west-2', 'ca-central-1'],
@pytest.mark.skipif(tests.integ.PYTHON_VERSION != 'py2',
reason="TensorFlow image supports only python 2.")
@pytest.mark.skipif(tests.integ.test_region() in ['us-west-1', 'eu-west-2', 'ca-central-1'],
reason='No ml.p2.xlarge supported in these regions')
def test_keras(sagemaker_session, tf_full_version):
script_path = os.path.join(DATA_DIR, 'cifar_10', 'source')
dataset_path = os.path.join(DATA_DIR, 'cifar_10', 'data')
script_path = os.path.join(tests.integ.DATA_DIR, 'cifar_10', 'source')
dataset_path = os.path.join(tests.integ.DATA_DIR, 'cifar_10', 'data')

with timeout(minutes=45):
estimator = TensorFlow(entry_point='keras_cnn_cifar_10.py',
Expand All @@ -39,7 +41,8 @@ def test_keras(sagemaker_session, tf_full_version):
train_instance_count=1, train_instance_type='ml.c4.xlarge',
train_max_run=45 * 60)

inputs = estimator.sagemaker_session.upload_data(path=dataset_path, key_prefix='data/cifar10')
inputs = estimator.sagemaker_session.upload_data(path=dataset_path,
key_prefix='data/cifar10')

estimator.fit(inputs)

Expand Down
15 changes: 8 additions & 7 deletions tests/integ/test_tfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,19 @@
from sagemaker.tensorflow.serving import Model, Predictor


@pytest.fixture(scope='session', params=['ml.c5.xlarge', 'ml.p3.2xlarge'])
@pytest.fixture(scope='session', params=[
'ml.c5.xlarge',
pytest.param('ml.p3.2xlarge',
marks=pytest.mark.skipif(
tests.integ.test_region() in tests.integ.HOSTING_P3_UNAVAILABLE_REGIONS,
reason='no ml.p3 instances in this region'))])
def instance_type(request):
return request.param


@pytest.fixture(scope='module')
def tfs_predictor(instance_type, sagemaker_session, tf_full_version):
endpoint_name = sagemaker.utils.name_from_base('sagemaker-tensorflow-serving')
endpoint_name = sagemaker.utils.unique_name_from_base('sagemaker-tensorflow-serving')
model_data = sagemaker_session.upload_data(
path='tests/data/tensorflow-serving-test-model.tar.gz',
key_prefix='tensorflow-serving/models')
Expand All @@ -42,11 +47,7 @@ def tfs_predictor(instance_type, sagemaker_session, tf_full_version):


@pytest.mark.continuous_testing
def test_predict(tfs_predictor, instance_type):
if ('p3' in instance_type) and (
tests.integ.REGION in tests.integ.HOSTING_P3_UNAVAILABLE_REGIONS):
pytest.skip('no ml.p3 instances in this region')

def test_predict(tfs_predictor, instance_type): # pylint: disable=W0613
input_data = {'instances': [1.0, 2.0, 5.0]}
expected_result = {'predictions': [3.5, 4.0, 5.5]}

Expand Down