change: add PyTorch configuration for image_uris.retrieve()

laurenyu · laurenyu · commit e89ecc62d142 · 2020-07-16T16:58:55.000-07:00
diff --git a/src/sagemaker/image_uri_config/pytorch.json b/src/sagemaker/image_uri_config/pytorch.json
diff --git a/src/sagemaker/image_uris.py b/src/sagemaker/image_uris.py
@@ -103,8 +103,9 @@ def config_for_framework(framework):
 def _validate_version_and_set_if_needed(version, config, framework):
     """Checks if the framework/algorithm version is one of the supported versions."""
     available_versions = list(config["versions"].keys())
+    aliased_versions = list(config.get("version_aliases", {}).keys())
 
-    if len(available_versions) == 1:
+    if len(available_versions) == 1 and version not in aliased_versions:
         log_message = "Defaulting to the only supported framework/algorithm version: {}.".format(
             available_versions[0]
         )
@@ -115,8 +116,7 @@ def _validate_version_and_set_if_needed(version, config, framework):
 
         return available_versions[0]
 
-    available_versions += list(config.get("version_aliases", {}).keys())
-    _validate_arg("{} version".format(framework), version, available_versions)
+    _validate_arg("{} version".format(framework), version, available_versions + aliased_versions)
 
     return version
 
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -120,14 +120,25 @@ def mxnet_py_version(request):
     return request.param
 
 
-@pytest.fixture(scope="module", params=["0.4", "0.4.0", "1.0", "1.0.0"])
-def pytorch_version(request):
-    return request.param
+@pytest.fixture(scope="module", params=["py2", "py3"])
+def pytorch_training_py_version(pytorch_training_version, request):
+    if Version(pytorch_training_version) < Version("1.5.0"):
+        return request.param
+    else:
+        return "py3"
 
 
 @pytest.fixture(scope="module", params=["py2", "py3"])
-def pytorch_py_version(request):
-    return request.param
+def pytorch_inference_py_version(pytorch_inference_version, request):
+    if Version(pytorch_inference_version) < Version("1.4.0"):
+        return request.param
+    else:
+        return "py3"
+
+
+@pytest.fixture(scope="module")
+def pytorch_eia_py_version():
+    return "py3"
 
 
 @pytest.fixture(scope="module", params=["0.20.0"])
@@ -176,21 +187,6 @@ def rl_ray_version(request):
     return request.param
 
 
-@pytest.fixture(scope="module")
-def pytorch_full_version():
-    return "1.5.0"
-
-
-@pytest.fixture(scope="module")
-def pytorch_full_py_version():
-    return "py3"
-
-
-@pytest.fixture(scope="module")
-def pytorch_full_ei_version():
-    return "1.3.1"
-
-
 @pytest.fixture(scope="module")
 def rl_coach_mxnet_full_version():
     return RLEstimator.COACH_LATEST_VERSION_MXNET
@@ -314,7 +310,7 @@ def pytest_generate_tests(metafunc):
 
 
 def _generate_all_framework_version_fixtures(metafunc):
-    for fw in ("chainer", "mxnet", "tensorflow", "xgboost"):
+    for fw in ("chainer", "mxnet", "pytorch", "tensorflow", "xgboost"):
         config = image_uris.config_for_framework(fw)
         if "scope" in config:
             _parametrize_framework_version_fixtures(metafunc, fw, config)
diff --git a/tests/integ/test_airflow_config.py b/tests/integ/test_airflow_config.py
@@ -578,14 +578,17 @@ def test_xgboost_airflow_config_uploads_data_source_to_s3(
 
 @pytest.mark.canary_quick
 def test_pytorch_airflow_config_uploads_data_source_to_s3_when_inputs_not_provided(
-    sagemaker_session, cpu_instance_type, pytorch_full_version, pytorch_full_py_version
+    sagemaker_session,
+    cpu_instance_type,
+    pytorch_training_latest_version,
+    pytorch_training_latest_py_version,
 ):
     with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
         estimator = PyTorch(
             entry_point=PYTORCH_MNIST_SCRIPT,
             role=ROLE,
-            framework_version=pytorch_full_version,
-            py_version=pytorch_full_py_version,
+            framework_version=pytorch_training_latest_version,
+            py_version=pytorch_training_latest_py_version,
             instance_count=2,
             instance_type=cpu_instance_type,
             hyperparameters={"epochs": 6, "backend": "gloo"},
diff --git a/tests/integ/test_git.py b/tests/integ/test_git.py
@@ -50,16 +50,18 @@
 
 
 @pytest.mark.local_mode
-def test_github(sagemaker_local_session, pytorch_full_version, pytorch_full_py_version):
+def test_github(
+    sagemaker_local_session, pytorch_training_latest_version, pytorch_training_latest_py_version
+):
     script_path = "mnist.py"
     git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT}
 
     pytorch = PyTorch(
         entry_point=script_path,
         role="SageMakerRole",
         source_dir="pytorch",
-        framework_version=pytorch_full_version,
-        py_version=pytorch_full_py_version,
+        framework_version=pytorch_training_latest_version,
+        py_version=pytorch_training_latest_py_version,
         instance_count=1,
         instance_type="local",
         sagemaker_session=sagemaker_local_session,
diff --git a/tests/integ/test_pytorch.py b/tests/integ/test_pytorch.py
@@ -38,11 +38,17 @@
 
 @pytest.fixture(scope="module", name="pytorch_training_job")
 def fixture_training_job(
-    sagemaker_session, pytorch_full_version, pytorch_full_py_version, cpu_instance_type
+    sagemaker_session,
+    pytorch_training_latest_version,
+    pytorch_training_latest_py_version,
+    cpu_instance_type,
 ):
     with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
         pytorch = _get_pytorch_estimator(
-            sagemaker_session, pytorch_full_version, pytorch_full_py_version, cpu_instance_type
+            sagemaker_session,
+            pytorch_training_latest_version,
+            pytorch_training_latest_py_version,
+            cpu_instance_type,
         )
 
         pytorch.fit({"training": _upload_training_data(pytorch)})
@@ -66,12 +72,14 @@ def test_fit_deploy(pytorch_training_job, sagemaker_session, cpu_instance_type):
 
 
 @pytest.mark.local_mode
-def test_local_fit_deploy(sagemaker_local_session, pytorch_full_version, pytorch_full_py_version):
+def test_local_fit_deploy(
+    sagemaker_local_session, pytorch_training_latest_version, pytorch_training_latest_py_version
+):
     pytorch = PyTorch(
         entry_point=MNIST_SCRIPT,
         role="SageMakerRole",
-        framework_version=pytorch_full_version,
-        py_version=pytorch_full_py_version,
+        framework_version=pytorch_training_latest_version,
+        py_version=pytorch_training_latest_py_version,
         instance_count=1,
         instance_type="local",
         sagemaker_session=sagemaker_local_session,
@@ -94,8 +102,8 @@ def test_deploy_model(
     pytorch_training_job,
     sagemaker_session,
     cpu_instance_type,
-    pytorch_full_version,
-    pytorch_full_py_version,
+    pytorch_inference_latest_version,
+    pytorch_inference_latest_py_version,
 ):
     endpoint_name = "test-pytorch-deploy-model-{}".format(sagemaker_timestamp())
 
@@ -108,8 +116,8 @@ def test_deploy_model(
             model_data,
             "SageMakerRole",
             entry_point=MNIST_SCRIPT,
-            framework_version=pytorch_full_version,
-            py_version=pytorch_full_py_version,
+            framework_version=pytorch_inference_latest_version,
+            py_version=pytorch_inference_latest_py_version,
             sagemaker_session=sagemaker_session,
         )
         predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name)
@@ -122,7 +130,10 @@ def test_deploy_model(
 
 
 def test_deploy_packed_model_with_entry_point_name(
-    sagemaker_session, cpu_instance_type, pytorch_full_version, pytorch_full_py_version
+    sagemaker_session,
+    cpu_instance_type,
+    pytorch_inference_latest_version,
+    pytorch_inference_latest_py_version,
 ):
     endpoint_name = "test-pytorch-deploy-model-{}".format(sagemaker_timestamp())
 
@@ -132,8 +143,8 @@ def test_deploy_packed_model_with_entry_point_name(
             model_data,
             "SageMakerRole",
             entry_point="mnist.py",
-            framework_version=pytorch_full_version,
-            py_version=pytorch_full_py_version,
+            framework_version=pytorch_inference_latest_version,
+            py_version=pytorch_inference_latest_py_version,
             sagemaker_session=sagemaker_session,
         )
         predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name)
@@ -149,16 +160,19 @@ def test_deploy_packed_model_with_entry_point_name(
     test_region() not in EI_SUPPORTED_REGIONS, reason="EI isn't supported in that specific region."
 )
 def test_deploy_model_with_accelerator(
-    sagemaker_session, cpu_instance_type, pytorch_full_ei_version, pytorch_full_py_version
+    sagemaker_session,
+    cpu_instance_type,
+    pytorch_eia_latest_ei_version,
+    pytorch_eia_latest_py_version,
 ):
     endpoint_name = "test-pytorch-deploy-eia-{}".format(sagemaker_timestamp())
     model_data = sagemaker_session.upload_data(path=EIA_MODEL)
     pytorch = PyTorchModel(
         model_data,
         "SageMakerRole",
         entry_point=EIA_SCRIPT,
-        framework_version=pytorch_full_ei_version,
-        py_version=pytorch_full_py_version,
+        framework_version=pytorch_eia_latest_ei_version,
+        py_version=pytorch_eia_latest_py_version,
         sagemaker_session=sagemaker_session,
     )
     with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
diff --git a/tests/integ/test_transformer.py b/tests/integ/test_transformer.py
@@ -154,8 +154,8 @@ def test_attach_transform_kmeans(sagemaker_session, cpu_instance_type):
 
 def test_transform_pytorch_vpc_custom_model_bucket(
     sagemaker_session,
-    pytorch_full_version,
-    pytorch_full_py_version,
+    pytorch_inference_latest_version,
+    pytorch_inference_latest_py_version,
     cpu_instance_type,
     custom_bucket_name,
 ):
@@ -174,8 +174,8 @@ def test_transform_pytorch_vpc_custom_model_bucket(
         model_data=model_data,
         entry_point=os.path.join(data_dir, "mnist.py"),
         role="SageMakerRole",
-        framework_version=pytorch_full_version,
-        py_version=pytorch_full_py_version,
+        framework_version=pytorch_inference_latest_version,
+        py_version=pytorch_inference_latest_py_version,
         sagemaker_session=sagemaker_session,
         vpc_config={"Subnets": subnet_ids, "SecurityGroupIds": [security_group_id]},
         code_location="s3://{}".format(custom_bucket_name),
diff --git a/tests/integ/test_tuner.py b/tests/integ/test_tuner.py
@@ -771,7 +771,10 @@ def test_tuning_chainer(
     "This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
 )
 def test_attach_tuning_pytorch(
-    sagemaker_session, cpu_instance_type, pytorch_full_version, pytorch_full_py_version
+    sagemaker_session,
+    cpu_instance_type,
+    pytorch_training_latest_version,
+    pytorch_training_latest_py_version,
 ):
     mnist_dir = os.path.join(DATA_DIR, "pytorch_mnist")
     mnist_script = os.path.join(mnist_dir, "mnist.py")
@@ -780,8 +783,8 @@ def test_attach_tuning_pytorch(
         entry_point=mnist_script,
         role="SageMakerRole",
         instance_count=1,
-        framework_version=pytorch_full_version,
-        py_version=pytorch_full_py_version,
+        framework_version=pytorch_training_latest_version,
+        py_version=pytorch_training_latest_py_version,
         instance_type=cpu_instance_type,
         sagemaker_session=sagemaker_session,
     )
diff --git a/tests/unit/sagemaker/image_uris/test_dlc_frameworks.py b/tests/unit/sagemaker/image_uris/test_dlc_frameworks.py
@@ -285,6 +285,96 @@ def _expected_mxnet_inference_uri(
     )
 
 
+def test_pytorch_training(pytorch_training_version, pytorch_training_py_version):
+    _test_image_uris(
+        "pytorch",
+        pytorch_training_version,
+        pytorch_training_py_version,
+        "training",
+        _expected_pytorch_training_uri,
+        {"pytorch_version": pytorch_training_version, "py_version": pytorch_training_py_version},
+    )
+
+
+def _expected_pytorch_training_uri(pytorch_version, py_version, processor="cpu", region=REGION):
+    version = Version(pytorch_version)
+    if version < Version("1.2"):
+        repo = "sagemaker-pytorch"
+    else:
+        repo = "pytorch-training"
+
+    return expected_uris.framework_uri(
+        repo,
+        pytorch_version,
+        _sagemaker_or_dlc_account(repo, region),
+        py_version=py_version,
+        processor=processor,
+        region=region,
+    )
+
+
+def test_pytorch_inference(pytorch_inference_version, pytorch_inference_py_version):
+    _test_image_uris(
+        "pytorch",
+        pytorch_inference_version,
+        pytorch_inference_py_version,
+        "inference",
+        _expected_pytorch_inference_uri,
+        {"pytorch_version": pytorch_inference_version, "py_version": pytorch_inference_py_version},
+    )
+
+
+def _expected_pytorch_inference_uri(pytorch_version, py_version, processor="cpu", region=REGION):
+    version = Version(pytorch_version)
+    if version < Version("1.2"):
+        repo = "sagemaker-pytorch"
+    else:
+        repo = "pytorch-inference"
+
+    return expected_uris.framework_uri(
+        repo,
+        pytorch_version,
+        _sagemaker_or_dlc_account(repo, region),
+        py_version=py_version,
+        processor=processor,
+        region=region,
+    )
+
+
+def test_pytorch_eia(pytorch_eia_version, pytorch_eia_py_version):
+    base_args = {
+        "framework": "pytorch",
+        "version": pytorch_eia_version,
+        "py_version": pytorch_eia_py_version,
+        "image_scope": "inference",
+        "instance_type": "ml.c4.xlarge",
+        "accelerator_type": "ml.eia1.medium",
+    }
+
+    uri = image_uris.retrieve(region=REGION, **base_args)
+
+    expected = expected_uris.framework_uri(
+        "pytorch-inference-eia",
+        pytorch_eia_version,
+        DLC_ACCOUNT,
+        py_version=pytorch_eia_py_version,
+        region=REGION,
+    )
+    assert expected == uri
+
+    for region, account in DLC_ALTERNATE_REGION_ACCOUNTS.items():
+        uri = image_uris.retrieve(region=region, **base_args)
+
+        expected = expected_uris.framework_uri(
+            "pytorch-inference-eia",
+            pytorch_eia_version,
+            account,
+            py_version=pytorch_eia_py_version,
+            region=region,
+        )
+        assert expected == uri
+
+
 def _sagemaker_or_dlc_account(repo, region):
     if repo.startswith("sagemaker"):
         return (
diff --git a/tests/unit/sagemaker/image_uris/test_retrieve.py b/tests/unit/sagemaker/image_uris/test_retrieve.py
@@ -127,6 +127,17 @@ def test_retrieve_aliased_version(config_for_framework):
     )
     assert "123412341234.dkr.ecr.us-west-2.amazonaws.com/dummy:{}-cpu-py3".format(version) == uri
 
+    del config["versions"]["1.1.0"]
+    uri = image_uris.retrieve(
+        framework="useless-string",
+        version=version,
+        py_version="py3",
+        instance_type="ml.c4.xlarge",
+        region="us-west-2",
+        image_scope="training",
+    )
+    assert "123412341234.dkr.ecr.us-west-2.amazonaws.com/dummy:{}-cpu-py3".format(version) == uri
+
 
 @patch("sagemaker.image_uris.config_for_framework")
 def test_retrieve_default_version_if_possible(config_for_framework, caplog):
diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py