fix tests with incorrect region-skipping code (#537)

jesterhazy · web-flow · commit 3b7cbd4a04df · 2018-12-09T13:23:27.000-08:00
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -13,21 +13,21 @@
 from __future__ import absolute_import
 
 import json
+import os
 
 import boto3
 import pytest
 from botocore.config import Config
 
 from sagemaker import Session
-from sagemaker.local import LocalSession
 from sagemaker.chainer import Chainer
+from sagemaker.local import LocalSession
 from sagemaker.mxnet import MXNet
-from sagemaker.rl import RLEstimator
 from sagemaker.pytorch.defaults import PYTORCH_VERSION
+from sagemaker.rl import RLEstimator
 from sagemaker.sklearn.defaults import SKLEARN_VERSION
 from sagemaker.tensorflow.defaults import TF_VERSION
 
-
 DEFAULT_REGION = 'us-west-2'
 
 
@@ -38,12 +38,23 @@ def pytest_addoption(parser):
     parser.addoption('--chainer-full-version', action='store', default=Chainer.LATEST_VERSION)
     parser.addoption('--mxnet-full-version', action='store', default=MXNet.LATEST_VERSION)
     parser.addoption('--pytorch-full-version', action='store', default=PYTORCH_VERSION)
-    parser.addoption('--rl-coach-full-version', action='store', default=RLEstimator.COACH_LATEST_VERSION)
-    parser.addoption('--rl-ray-full-version', action='store', default=RLEstimator.RAY_LATEST_VERSION)
+    parser.addoption('--rl-coach-full-version', action='store',
+                     default=RLEstimator.COACH_LATEST_VERSION)
+    parser.addoption('--rl-ray-full-version', action='store',
+                     default=RLEstimator.RAY_LATEST_VERSION)
     parser.addoption('--sklearn-full-version', action='store', default=SKLEARN_VERSION)
     parser.addoption('--tf-full-version', action='store', default=TF_VERSION)
 
 
+def pytest_configure(config):
+    bc = config.getoption('--boto-config')
+    parsed = json.loads(bc) if bc else {}
+    region = parsed.get('region_name', boto3.session.Session().region_name)
+
+    if region:
+        os.environ['TEST_AWS_REGION_NAME'] = region
+
+
 @pytest.fixture(scope='session')
 def sagemaker_client_config(request):
     config = request.config.getoption('--sagemaker-client-config')
@@ -64,10 +75,13 @@ def boto_config(request):
 
 @pytest.fixture(scope='session')
 def sagemaker_session(sagemaker_client_config, sagemaker_runtime_config, boto_config):
-    boto_session = boto3.Session(**boto_config) if boto_config else boto3.Session(region_name=DEFAULT_REGION)
+    boto_session = boto3.Session(**boto_config) if boto_config else boto3.Session(
+        region_name=DEFAULT_REGION)
     sagemaker_client_config.setdefault('config', Config(retries=dict(max_attempts=10)))
-    sagemaker_client = boto_session.client('sagemaker', **sagemaker_client_config) if sagemaker_client_config else None
-    runtime_client = (boto_session.client('sagemaker-runtime', **sagemaker_runtime_config) if sagemaker_runtime_config
+    sagemaker_client = boto_session.client('sagemaker',
+                                           **sagemaker_client_config) if sagemaker_client_config else None
+    runtime_client = (boto_session.client('sagemaker-runtime',
+                                          **sagemaker_runtime_config) if sagemaker_runtime_config
                       else None)
 
     return Session(boto_session=boto_session,
diff --git a/tests/integ/__init__.py b/tests/integ/__init__.py
@@ -23,11 +23,13 @@
 TUNING_DEFAULT_TIMEOUT_MINUTES = 20
 TRANSFORM_DEFAULT_TIMEOUT_MINUTES = 20
 PYTHON_VERSION = 'py' + str(sys.version_info.major)
-REGION = boto3.session.Session().region_name
-
 HOSTING_P2_UNAVAILABLE_REGIONS = ['ca-central-1', 'us-west-1', 'eu-west-2']
 HOSTING_P3_UNAVAILABLE_REGIONS = ['ap-southeast-1', 'ap-southeast-2', 'ap-south-1', 'ca-central-1',
                                   'us-west-1']
 
 logging.getLogger('boto3').setLevel(logging.INFO)
 logging.getLogger('botocore').setLevel(logging.INFO)
+
+
+def test_region():
+    return os.environ.get('TEST_AWS_REGION_NAME', boto3.session.Session().region_name)
diff --git a/tests/integ/test_chainer_train.py b/tests/integ/test_chainer_train.py
@@ -22,7 +22,8 @@
 from sagemaker.chainer.estimator import Chainer
 from sagemaker.chainer.model import ChainerModel
 from sagemaker.utils import sagemaker_timestamp
-from tests.integ import DATA_DIR, PYTHON_VERSION, TRAINING_DEFAULT_TIMEOUT_MINUTES, REGION
+import tests.integ
+from tests.integ import DATA_DIR, PYTHON_VERSION, TRAINING_DEFAULT_TIMEOUT_MINUTES
 from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name
 
 
@@ -35,7 +36,7 @@ def test_distributed_cpu_training(sagemaker_session, chainer_full_version):
     _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 2, chainer_full_version)
 
 
-@pytest.mark.skipif(REGION in ['us-west-1', 'eu-west-2', 'ca-central-1'],
+@pytest.mark.skipif(tests.integ.test_region() in ['us-west-1', 'eu-west-2', 'ca-central-1'],
                     reason='No ml.p2.xlarge supported in these regions')
 def test_distributed_gpu_training(sagemaker_session, chainer_full_version):
     _run_mnist_training_job(sagemaker_session, "ml.p2.xlarge", 2, chainer_full_version)
diff --git a/tests/integ/test_pytorch_train.py b/tests/integ/test_pytorch_train.py
@@ -11,15 +11,19 @@
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
 from __future__ import absolute_import
-import numpy
+
 import os
 import time
+
+import numpy
 import pytest
+import tests.integ
+from tests.integ import DATA_DIR, PYTHON_VERSION, TRAINING_DEFAULT_TIMEOUT_MINUTES
+from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name
+
 from sagemaker.pytorch.estimator import PyTorch
 from sagemaker.pytorch.model import PyTorchModel
 from sagemaker.utils import sagemaker_timestamp
-from tests.integ import DATA_DIR, PYTHON_VERSION, TRAINING_DEFAULT_TIMEOUT_MINUTES, REGION
-from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name
 
 MNIST_DIR = os.path.join(DATA_DIR, 'pytorch_mnist')
 MNIST_SCRIPT = os.path.join(MNIST_DIR, 'mnist.py')
@@ -57,9 +61,11 @@ def test_deploy_model(pytorch_training_job, sagemaker_session):
     endpoint_name = 'test-pytorch-deploy-model-{}'.format(sagemaker_timestamp())
 
     with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
-        desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=pytorch_training_job)
+        desc = sagemaker_session.sagemaker_client.describe_training_job(
+            TrainingJobName=pytorch_training_job)
         model_data = desc['ModelArtifacts']['S3ModelArtifacts']
-        model = PyTorchModel(model_data, 'SageMakerRole', entry_point=MNIST_SCRIPT, sagemaker_session=sagemaker_session)
+        model = PyTorchModel(model_data, 'SageMakerRole', entry_point=MNIST_SCRIPT,
+                             sagemaker_session=sagemaker_session)
         predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
 
         batch_size = 100
@@ -69,7 +75,7 @@ def test_deploy_model(pytorch_training_job, sagemaker_session):
         assert output.shape == (batch_size, 10)
 
 
-@pytest.mark.skipif(REGION in ['us-west-1', 'eu-west-2', 'ca-central-1'],
+@pytest.mark.skipif(tests.integ.test_region() in ['us-west-1', 'eu-west-2', 'ca-central-1'],
                     reason='No ml.p2.xlarge supported in these regions')
 def test_async_fit_deploy(sagemaker_session, pytorch_full_version):
     training_job_name = ""
@@ -90,7 +96,8 @@ def test_async_fit_deploy(sagemaker_session, pytorch_full_version):
 
         with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
             print("Re-attaching now to: %s" % training_job_name)
-            estimator = PyTorch.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
+            estimator = PyTorch.attach(training_job_name=training_job_name,
+                                       sagemaker_session=sagemaker_session)
             predictor = estimator.deploy(1, instance_type, endpoint_name=endpoint_name)
 
             batch_size = 100
@@ -105,7 +112,8 @@ def test_failed_training_job(sagemaker_session, pytorch_full_version):
     script_path = os.path.join(MNIST_DIR, 'failure_script.py')
 
     with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
-        pytorch = _get_pytorch_estimator(sagemaker_session, pytorch_full_version, entry_point=script_path)
+        pytorch = _get_pytorch_estimator(sagemaker_session, pytorch_full_version,
+                                         entry_point=script_path)
 
         with pytest.raises(ValueError) as e:
             pytorch.fit()
@@ -119,8 +127,10 @@ def _upload_training_data(pytorch):
 
 def _get_pytorch_estimator(sagemaker_session, pytorch_full_version, instance_type='ml.c4.xlarge',
                            entry_point=MNIST_SCRIPT):
-    return PyTorch(entry_point=entry_point, role='SageMakerRole', framework_version=pytorch_full_version,
-                   py_version=PYTHON_VERSION, train_instance_count=1, train_instance_type=instance_type,
+    return PyTorch(entry_point=entry_point, role='SageMakerRole',
+                   framework_version=pytorch_full_version,
+                   py_version=PYTHON_VERSION, train_instance_count=1,
+                   train_instance_type=instance_type,
                    sagemaker_session=sagemaker_session)
 
 
diff --git a/tests/integ/test_tf_cifar.py b/tests/integ/test_tf_cifar.py
@@ -18,10 +18,11 @@
 import numpy as np
 import pytest
 
-from sagemaker.tensorflow import TensorFlow
-from tests.integ import DATA_DIR, PYTHON_VERSION, REGION
+import tests.integ
 from tests.integ.timeout import timeout_and_delete_endpoint_by_name, timeout
 
+from sagemaker.tensorflow import TensorFlow
+
 PICKLE_CONTENT_TYPE = 'application/python-pickle'
 
 
@@ -34,22 +35,26 @@ def __call__(self, data):
 
 
 @pytest.mark.continuous_testing
-@pytest.mark.skipif(PYTHON_VERSION != 'py2', reason="TensorFlow image supports only python 2.")
-@pytest.mark.skipif(REGION in ['us-west-1', 'eu-west-2', 'ca-central-1'],
+@pytest.mark.skipif(tests.integ.PYTHON_VERSION != 'py2',
+                    reason="TensorFlow image supports only python 2.")
+@pytest.mark.skipif(tests.integ.test_region() in ['us-west-1', 'eu-west-2', 'ca-central-1'],
                     reason='No ml.p2.xlarge supported in these regions')
 def test_cifar(sagemaker_session, tf_full_version):
     with timeout(minutes=45):
-        script_path = os.path.join(DATA_DIR, 'cifar_10', 'source')
+        script_path = os.path.join(tests.integ.DATA_DIR, 'cifar_10', 'source')
 
-        dataset_path = os.path.join(DATA_DIR, 'cifar_10', 'data')
+        dataset_path = os.path.join(tests.integ.DATA_DIR, 'cifar_10', 'data')
 
-        estimator = TensorFlow(entry_point='resnet_cifar_10.py', source_dir=script_path, role='SageMakerRole',
-                               framework_version=tf_full_version, training_steps=500, evaluation_steps=5,
+        estimator = TensorFlow(entry_point='resnet_cifar_10.py', source_dir=script_path,
+                               role='SageMakerRole',
+                               framework_version=tf_full_version, training_steps=500,
+                               evaluation_steps=5,
                                train_instance_count=2, train_instance_type='ml.p2.xlarge',
                                sagemaker_session=sagemaker_session, train_max_run=45 * 60,
                                base_job_name='test-cifar')
 
-        inputs = estimator.sagemaker_session.upload_data(path=dataset_path, key_prefix='data/cifar10')
+        inputs = estimator.sagemaker_session.upload_data(path=dataset_path,
+                                                         key_prefix='data/cifar10')
         estimator.fit(inputs, logs=False)
         print('job succeeded: {}'.format(estimator.latest_training_job.name))
 
diff --git a/tests/integ/test_tf_keras.py b/tests/integ/test_tf_keras.py
@@ -17,18 +17,20 @@
 import numpy as np
 import pytest
 
-from sagemaker.tensorflow import TensorFlow
-from tests.integ import DATA_DIR, PYTHON_VERSION, REGION
+import tests.integ
 from tests.integ.timeout import timeout_and_delete_endpoint_by_name, timeout
 
+from sagemaker.tensorflow import TensorFlow
+
 
 @pytest.mark.continuous_testing
-@pytest.mark.skipif(PYTHON_VERSION != 'py2', reason="TensorFlow image supports only python 2.")
-@pytest.mark.skipif(REGION in ['us-west-1', 'eu-west-2', 'ca-central-1'],
+@pytest.mark.skipif(tests.integ.PYTHON_VERSION != 'py2',
+                    reason="TensorFlow image supports only python 2.")
+@pytest.mark.skipif(tests.integ.test_region() in ['us-west-1', 'eu-west-2', 'ca-central-1'],
                     reason='No ml.p2.xlarge supported in these regions')
 def test_keras(sagemaker_session, tf_full_version):
-    script_path = os.path.join(DATA_DIR, 'cifar_10', 'source')
-    dataset_path = os.path.join(DATA_DIR, 'cifar_10', 'data')
+    script_path = os.path.join(tests.integ.DATA_DIR, 'cifar_10', 'source')
+    dataset_path = os.path.join(tests.integ.DATA_DIR, 'cifar_10', 'data')
 
     with timeout(minutes=45):
         estimator = TensorFlow(entry_point='keras_cnn_cifar_10.py',
@@ -39,7 +41,8 @@ def test_keras(sagemaker_session, tf_full_version):
                                train_instance_count=1, train_instance_type='ml.c4.xlarge',
                                train_max_run=45 * 60)
 
-        inputs = estimator.sagemaker_session.upload_data(path=dataset_path, key_prefix='data/cifar10')
+        inputs = estimator.sagemaker_session.upload_data(path=dataset_path,
+                                                         key_prefix='data/cifar10')
 
         estimator.fit(inputs)
 
diff --git a/tests/integ/test_tfs.py b/tests/integ/test_tfs.py
@@ -22,14 +22,19 @@
 from sagemaker.tensorflow.serving import Model, Predictor
 
 
-@pytest.fixture(scope='session', params=['ml.c5.xlarge', 'ml.p3.2xlarge'])
+@pytest.fixture(scope='session', params=[
+    'ml.c5.xlarge',
+    pytest.param('ml.p3.2xlarge',
+                 marks=pytest.mark.skipif(
+                     tests.integ.test_region() in tests.integ.HOSTING_P3_UNAVAILABLE_REGIONS,
+                     reason='no ml.p3 instances in this region'))])
 def instance_type(request):
     return request.param
 
 
 @pytest.fixture(scope='module')
 def tfs_predictor(instance_type, sagemaker_session, tf_full_version):
-    endpoint_name = sagemaker.utils.name_from_base('sagemaker-tensorflow-serving')
+    endpoint_name = sagemaker.utils.unique_name_from_base('sagemaker-tensorflow-serving')
     model_data = sagemaker_session.upload_data(
         path='tests/data/tensorflow-serving-test-model.tar.gz',
         key_prefix='tensorflow-serving/models')
@@ -42,11 +47,7 @@ def tfs_predictor(instance_type, sagemaker_session, tf_full_version):
 
 
 @pytest.mark.continuous_testing
-def test_predict(tfs_predictor, instance_type):
-    if ('p3' in instance_type) and (
-            tests.integ.REGION in tests.integ.HOSTING_P3_UNAVAILABLE_REGIONS):
-        pytest.skip('no ml.p3 instances in this region')
-
+def test_predict(tfs_predictor, instance_type):  # pylint: disable=W0613
     input_data = {'instances': [1.0, 2.0, 5.0]}
     expected_result = {'predictions': [3.5, 4.0, 5.5]}