From e89ecc62d142430abfbff3cab219b64888d8e712 Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Wed, 15 Jul 2020 17:00:36 -0700 Subject: [PATCH 1/2] change: add PyTorch configuration for image_uris.retrieve() --- src/sagemaker/image_uri_config/pytorch.json | 458 ++++++++++++++++++ src/sagemaker/image_uris.py | 6 +- tests/conftest.py | 38 +- tests/integ/test_airflow_config.py | 9 +- tests/integ/test_git.py | 8 +- tests/integ/test_pytorch.py | 44 +- tests/integ/test_transformer.py | 8 +- tests/integ/test_tuner.py | 9 +- .../image_uris/test_dlc_frameworks.py | 90 ++++ .../sagemaker/image_uris/test_retrieve.py | 11 + tests/unit/test_pytorch.py | 199 ++++---- 11 files changed, 724 insertions(+), 156 deletions(-) create mode 100644 src/sagemaker/image_uri_config/pytorch.json diff --git a/src/sagemaker/image_uri_config/pytorch.json b/src/sagemaker/image_uri_config/pytorch.json new file mode 100644 index 0000000000..584312e4da --- /dev/null +++ b/src/sagemaker/image_uri_config/pytorch.json @@ -0,0 +1,458 @@ +{ + "training": { + "processors": ["cpu", "gpu"], + "version_aliases": { + "0.4": "0.4.0", + "1.0": "1.0.0", + "1.1": "1.1.0", + "1.2": "1.2.0", + "1.3": "1.3.1", + "1.4": "1.4.0", + "1.5": "1.5.0" + }, + "versions": { + "0.4.0": { + "registries": { + "ap-east-1": "057415533634", + "ap-northeast-1": "520713654638", + "ap-northeast-2": "520713654638", + "ap-south-1": "520713654638", + "ap-southeast-1": "520713654638", + "ap-southeast-2": "520713654638", + "ca-central-1": "520713654638", + "cn-north-1": "422961961927", + "cn-northwest-1": "423003514399", + "eu-central-1": "520713654638", + "eu-north-1": "520713654638", + "eu-west-1": "520713654638", + "eu-west-2": "520713654638", + "eu-west-3": "520713654638", + "me-south-1": "724002660598", + "sa-east-1": "520713654638", + "us-east-1": "520713654638", + "us-east-2": "520713654638", + "us-gov-west-1": "246785580436", + "us-iso-east-1": "744548109606", + "us-west-1": "520713654638", + "us-west-2": "520713654638" + }, + "repository": "sagemaker-pytorch", + "py_versions": ["py2", "py3"] + }, + "1.0.0": { + "registries": { + "ap-east-1": "057415533634", + "ap-northeast-1": "520713654638", + "ap-northeast-2": "520713654638", + "ap-south-1": "520713654638", + "ap-southeast-1": "520713654638", + "ap-southeast-2": "520713654638", + "ca-central-1": "520713654638", + "cn-north-1": "422961961927", + "cn-northwest-1": "423003514399", + "eu-central-1": "520713654638", + "eu-north-1": "520713654638", + "eu-west-1": "520713654638", + "eu-west-2": "520713654638", + "eu-west-3": "520713654638", + "me-south-1": "724002660598", + "sa-east-1": "520713654638", + "us-east-1": "520713654638", + "us-east-2": "520713654638", + "us-gov-west-1": "246785580436", + "us-iso-east-1": "744548109606", + "us-west-1": "520713654638", + "us-west-2": "520713654638" + }, + "repository": "sagemaker-pytorch", + "py_versions": ["py2", "py3"] + }, + "1.1.0": { + "registries": { + "ap-east-1": "057415533634", + "ap-northeast-1": "520713654638", + "ap-northeast-2": "520713654638", + "ap-south-1": "520713654638", + "ap-southeast-1": "520713654638", + "ap-southeast-2": "520713654638", + "ca-central-1": "520713654638", + "cn-north-1": "422961961927", + "cn-northwest-1": "423003514399", + "eu-central-1": "520713654638", + "eu-north-1": "520713654638", + "eu-west-1": "520713654638", + "eu-west-2": "520713654638", + "eu-west-3": "520713654638", + "me-south-1": "724002660598", + "sa-east-1": "520713654638", + "us-east-1": "520713654638", + "us-east-2": "520713654638", + "us-gov-west-1": "246785580436", + "us-iso-east-1": "744548109606", + "us-west-1": "520713654638", + "us-west-2": "520713654638" + }, + "repository": "sagemaker-pytorch", + "py_versions": ["py2", "py3"] + }, + "1.2.0": { + "registries": { + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "pytorch-training", + "py_versions": ["py2", "py3"] + }, + "1.3.1": { + "registries": { + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "pytorch-training", + "py_versions": ["py2", "py3"] + }, + "1.4.0": { + "registries": { + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "pytorch-training", + "py_versions": ["py2", "py3"] + }, + "1.5.0": { + "registries": { + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "pytorch-training", + "py_versions": ["py3"] + } + } + }, + "inference": { + "processors": ["cpu", "gpu"], + "version_aliases": { + "0.4": "0.4.0", + "1.0": "1.0.0", + "1.1": "1.1.0", + "1.2": "1.2.0", + "1.3": "1.3.1", + "1.4": "1.4.0", + "1.5": "1.5.0" + }, + "versions": { + "0.4.0": { + "registries": { + "ap-east-1": "057415533634", + "ap-northeast-1": "520713654638", + "ap-northeast-2": "520713654638", + "ap-south-1": "520713654638", + "ap-southeast-1": "520713654638", + "ap-southeast-2": "520713654638", + "ca-central-1": "520713654638", + "cn-north-1": "422961961927", + "cn-northwest-1": "423003514399", + "eu-central-1": "520713654638", + "eu-north-1": "520713654638", + "eu-west-1": "520713654638", + "eu-west-2": "520713654638", + "eu-west-3": "520713654638", + "me-south-1": "724002660598", + "sa-east-1": "520713654638", + "us-east-1": "520713654638", + "us-east-2": "520713654638", + "us-gov-west-1": "246785580436", + "us-iso-east-1": "744548109606", + "us-west-1": "520713654638", + "us-west-2": "520713654638" + }, + "repository": "sagemaker-pytorch", + "py_versions": ["py2", "py3"] + }, + "1.0.0": { + "registries": { + "ap-east-1": "057415533634", + "ap-northeast-1": "520713654638", + "ap-northeast-2": "520713654638", + "ap-south-1": "520713654638", + "ap-southeast-1": "520713654638", + "ap-southeast-2": "520713654638", + "ca-central-1": "520713654638", + "cn-north-1": "422961961927", + "cn-northwest-1": "423003514399", + "eu-central-1": "520713654638", + "eu-north-1": "520713654638", + "eu-west-1": "520713654638", + "eu-west-2": "520713654638", + "eu-west-3": "520713654638", + "me-south-1": "724002660598", + "sa-east-1": "520713654638", + "us-east-1": "520713654638", + "us-east-2": "520713654638", + "us-gov-west-1": "246785580436", + "us-iso-east-1": "744548109606", + "us-west-1": "520713654638", + "us-west-2": "520713654638" + }, + "repository": "sagemaker-pytorch", + "py_versions": ["py2", "py3"] + }, + "1.1.0": { + "registries": { + "ap-east-1": "057415533634", + "ap-northeast-1": "520713654638", + "ap-northeast-2": "520713654638", + "ap-south-1": "520713654638", + "ap-southeast-1": "520713654638", + "ap-southeast-2": "520713654638", + "ca-central-1": "520713654638", + "cn-north-1": "422961961927", + "cn-northwest-1": "423003514399", + "eu-central-1": "520713654638", + "eu-north-1": "520713654638", + "eu-west-1": "520713654638", + "eu-west-2": "520713654638", + "eu-west-3": "520713654638", + "me-south-1": "724002660598", + "sa-east-1": "520713654638", + "us-east-1": "520713654638", + "us-east-2": "520713654638", + "us-gov-west-1": "246785580436", + "us-iso-east-1": "744548109606", + "us-west-1": "520713654638", + "us-west-2": "520713654638" + }, + "repository": "sagemaker-pytorch", + "py_versions": ["py2", "py3"] + }, + "1.2.0": { + "registries": { + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "pytorch-inference", + "py_versions": ["py2", "py3"] + }, + "1.3.1": { + "registries": { + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "pytorch-inference", + "py_versions": ["py2", "py3"] + }, + "1.4.0": { + "registries": { + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "pytorch-inference", + "py_versions": ["py3"] + }, + "1.5.0": { + "registries": { + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "pytorch-inference", + "py_versions": ["py3"] + } + } + }, + "eia": { + "processors": ["cpu"], + "version_aliases": { + "1.3": "1.3.1" + }, + "versions": { + "1.3.1": { + "registries": { + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "pytorch-inference-eia", + "py_versions": ["py3"] + } + } + } +} diff --git a/src/sagemaker/image_uris.py b/src/sagemaker/image_uris.py index ebb234bcb0..ecb29f5b45 100644 --- a/src/sagemaker/image_uris.py +++ b/src/sagemaker/image_uris.py @@ -103,8 +103,9 @@ def config_for_framework(framework): def _validate_version_and_set_if_needed(version, config, framework): """Checks if the framework/algorithm version is one of the supported versions.""" available_versions = list(config["versions"].keys()) + aliased_versions = list(config.get("version_aliases", {}).keys()) - if len(available_versions) == 1: + if len(available_versions) == 1 and version not in aliased_versions: log_message = "Defaulting to the only supported framework/algorithm version: {}.".format( available_versions[0] ) @@ -115,8 +116,7 @@ def _validate_version_and_set_if_needed(version, config, framework): return available_versions[0] - available_versions += list(config.get("version_aliases", {}).keys()) - _validate_arg("{} version".format(framework), version, available_versions) + _validate_arg("{} version".format(framework), version, available_versions + aliased_versions) return version diff --git a/tests/conftest.py b/tests/conftest.py index 9b0e43c83e..7b69e9f631 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -120,14 +120,25 @@ def mxnet_py_version(request): return request.param -@pytest.fixture(scope="module", params=["0.4", "0.4.0", "1.0", "1.0.0"]) -def pytorch_version(request): - return request.param +@pytest.fixture(scope="module", params=["py2", "py3"]) +def pytorch_training_py_version(pytorch_training_version, request): + if Version(pytorch_training_version) < Version("1.5.0"): + return request.param + else: + return "py3" @pytest.fixture(scope="module", params=["py2", "py3"]) -def pytorch_py_version(request): - return request.param +def pytorch_inference_py_version(pytorch_inference_version, request): + if Version(pytorch_inference_version) < Version("1.4.0"): + return request.param + else: + return "py3" + + +@pytest.fixture(scope="module") +def pytorch_eia_py_version(): + return "py3" @pytest.fixture(scope="module", params=["0.20.0"]) @@ -176,21 +187,6 @@ def rl_ray_version(request): return request.param -@pytest.fixture(scope="module") -def pytorch_full_version(): - return "1.5.0" - - -@pytest.fixture(scope="module") -def pytorch_full_py_version(): - return "py3" - - -@pytest.fixture(scope="module") -def pytorch_full_ei_version(): - return "1.3.1" - - @pytest.fixture(scope="module") def rl_coach_mxnet_full_version(): return RLEstimator.COACH_LATEST_VERSION_MXNET @@ -314,7 +310,7 @@ def pytest_generate_tests(metafunc): def _generate_all_framework_version_fixtures(metafunc): - for fw in ("chainer", "mxnet", "tensorflow", "xgboost"): + for fw in ("chainer", "mxnet", "pytorch", "tensorflow", "xgboost"): config = image_uris.config_for_framework(fw) if "scope" in config: _parametrize_framework_version_fixtures(metafunc, fw, config) diff --git a/tests/integ/test_airflow_config.py b/tests/integ/test_airflow_config.py index de321bda9d..9d32f240d1 100644 --- a/tests/integ/test_airflow_config.py +++ b/tests/integ/test_airflow_config.py @@ -578,14 +578,17 @@ def test_xgboost_airflow_config_uploads_data_source_to_s3( @pytest.mark.canary_quick def test_pytorch_airflow_config_uploads_data_source_to_s3_when_inputs_not_provided( - sagemaker_session, cpu_instance_type, pytorch_full_version, pytorch_full_py_version + sagemaker_session, + cpu_instance_type, + pytorch_training_latest_version, + pytorch_training_latest_py_version, ): with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS): estimator = PyTorch( entry_point=PYTORCH_MNIST_SCRIPT, role=ROLE, - framework_version=pytorch_full_version, - py_version=pytorch_full_py_version, + framework_version=pytorch_training_latest_version, + py_version=pytorch_training_latest_py_version, instance_count=2, instance_type=cpu_instance_type, hyperparameters={"epochs": 6, "backend": "gloo"}, diff --git a/tests/integ/test_git.py b/tests/integ/test_git.py index 8a72bd2d75..bb08f74c4a 100644 --- a/tests/integ/test_git.py +++ b/tests/integ/test_git.py @@ -50,7 +50,9 @@ @pytest.mark.local_mode -def test_github(sagemaker_local_session, pytorch_full_version, pytorch_full_py_version): +def test_github( + sagemaker_local_session, pytorch_training_latest_version, pytorch_training_latest_py_version +): script_path = "mnist.py" git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT} @@ -58,8 +60,8 @@ def test_github(sagemaker_local_session, pytorch_full_version, pytorch_full_py_v entry_point=script_path, role="SageMakerRole", source_dir="pytorch", - framework_version=pytorch_full_version, - py_version=pytorch_full_py_version, + framework_version=pytorch_training_latest_version, + py_version=pytorch_training_latest_py_version, instance_count=1, instance_type="local", sagemaker_session=sagemaker_local_session, diff --git a/tests/integ/test_pytorch.py b/tests/integ/test_pytorch.py index 183a096271..572e01992d 100644 --- a/tests/integ/test_pytorch.py +++ b/tests/integ/test_pytorch.py @@ -38,11 +38,17 @@ @pytest.fixture(scope="module", name="pytorch_training_job") def fixture_training_job( - sagemaker_session, pytorch_full_version, pytorch_full_py_version, cpu_instance_type + sagemaker_session, + pytorch_training_latest_version, + pytorch_training_latest_py_version, + cpu_instance_type, ): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): pytorch = _get_pytorch_estimator( - sagemaker_session, pytorch_full_version, pytorch_full_py_version, cpu_instance_type + sagemaker_session, + pytorch_training_latest_version, + pytorch_training_latest_py_version, + cpu_instance_type, ) pytorch.fit({"training": _upload_training_data(pytorch)}) @@ -66,12 +72,14 @@ def test_fit_deploy(pytorch_training_job, sagemaker_session, cpu_instance_type): @pytest.mark.local_mode -def test_local_fit_deploy(sagemaker_local_session, pytorch_full_version, pytorch_full_py_version): +def test_local_fit_deploy( + sagemaker_local_session, pytorch_training_latest_version, pytorch_training_latest_py_version +): pytorch = PyTorch( entry_point=MNIST_SCRIPT, role="SageMakerRole", - framework_version=pytorch_full_version, - py_version=pytorch_full_py_version, + framework_version=pytorch_training_latest_version, + py_version=pytorch_training_latest_py_version, instance_count=1, instance_type="local", sagemaker_session=sagemaker_local_session, @@ -94,8 +102,8 @@ def test_deploy_model( pytorch_training_job, sagemaker_session, cpu_instance_type, - pytorch_full_version, - pytorch_full_py_version, + pytorch_inference_latest_version, + pytorch_inference_latest_py_version, ): endpoint_name = "test-pytorch-deploy-model-{}".format(sagemaker_timestamp()) @@ -108,8 +116,8 @@ def test_deploy_model( model_data, "SageMakerRole", entry_point=MNIST_SCRIPT, - framework_version=pytorch_full_version, - py_version=pytorch_full_py_version, + framework_version=pytorch_inference_latest_version, + py_version=pytorch_inference_latest_py_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) @@ -122,7 +130,10 @@ def test_deploy_model( def test_deploy_packed_model_with_entry_point_name( - sagemaker_session, cpu_instance_type, pytorch_full_version, pytorch_full_py_version + sagemaker_session, + cpu_instance_type, + pytorch_inference_latest_version, + pytorch_inference_latest_py_version, ): endpoint_name = "test-pytorch-deploy-model-{}".format(sagemaker_timestamp()) @@ -132,8 +143,8 @@ def test_deploy_packed_model_with_entry_point_name( model_data, "SageMakerRole", entry_point="mnist.py", - framework_version=pytorch_full_version, - py_version=pytorch_full_py_version, + framework_version=pytorch_inference_latest_version, + py_version=pytorch_inference_latest_py_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) @@ -149,7 +160,10 @@ def test_deploy_packed_model_with_entry_point_name( test_region() not in EI_SUPPORTED_REGIONS, reason="EI isn't supported in that specific region." ) def test_deploy_model_with_accelerator( - sagemaker_session, cpu_instance_type, pytorch_full_ei_version, pytorch_full_py_version + sagemaker_session, + cpu_instance_type, + pytorch_eia_latest_ei_version, + pytorch_eia_latest_py_version, ): endpoint_name = "test-pytorch-deploy-eia-{}".format(sagemaker_timestamp()) model_data = sagemaker_session.upload_data(path=EIA_MODEL) @@ -157,8 +171,8 @@ def test_deploy_model_with_accelerator( model_data, "SageMakerRole", entry_point=EIA_SCRIPT, - framework_version=pytorch_full_ei_version, - py_version=pytorch_full_py_version, + framework_version=pytorch_eia_latest_ei_version, + py_version=pytorch_eia_latest_py_version, sagemaker_session=sagemaker_session, ) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): diff --git a/tests/integ/test_transformer.py b/tests/integ/test_transformer.py index 75a7fb1b87..aeb7e06792 100644 --- a/tests/integ/test_transformer.py +++ b/tests/integ/test_transformer.py @@ -154,8 +154,8 @@ def test_attach_transform_kmeans(sagemaker_session, cpu_instance_type): def test_transform_pytorch_vpc_custom_model_bucket( sagemaker_session, - pytorch_full_version, - pytorch_full_py_version, + pytorch_inference_latest_version, + pytorch_inference_latest_py_version, cpu_instance_type, custom_bucket_name, ): @@ -174,8 +174,8 @@ def test_transform_pytorch_vpc_custom_model_bucket( model_data=model_data, entry_point=os.path.join(data_dir, "mnist.py"), role="SageMakerRole", - framework_version=pytorch_full_version, - py_version=pytorch_full_py_version, + framework_version=pytorch_inference_latest_version, + py_version=pytorch_inference_latest_py_version, sagemaker_session=sagemaker_session, vpc_config={"Subnets": subnet_ids, "SecurityGroupIds": [security_group_id]}, code_location="s3://{}".format(custom_bucket_name), diff --git a/tests/integ/test_tuner.py b/tests/integ/test_tuner.py index 197b095bf8..986ef78b85 100644 --- a/tests/integ/test_tuner.py +++ b/tests/integ/test_tuner.py @@ -771,7 +771,10 @@ def test_tuning_chainer( "This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968" ) def test_attach_tuning_pytorch( - sagemaker_session, cpu_instance_type, pytorch_full_version, pytorch_full_py_version + sagemaker_session, + cpu_instance_type, + pytorch_training_latest_version, + pytorch_training_latest_py_version, ): mnist_dir = os.path.join(DATA_DIR, "pytorch_mnist") mnist_script = os.path.join(mnist_dir, "mnist.py") @@ -780,8 +783,8 @@ def test_attach_tuning_pytorch( entry_point=mnist_script, role="SageMakerRole", instance_count=1, - framework_version=pytorch_full_version, - py_version=pytorch_full_py_version, + framework_version=pytorch_training_latest_version, + py_version=pytorch_training_latest_py_version, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, ) diff --git a/tests/unit/sagemaker/image_uris/test_dlc_frameworks.py b/tests/unit/sagemaker/image_uris/test_dlc_frameworks.py index be5b5f4e8c..fee1162954 100644 --- a/tests/unit/sagemaker/image_uris/test_dlc_frameworks.py +++ b/tests/unit/sagemaker/image_uris/test_dlc_frameworks.py @@ -285,6 +285,96 @@ def _expected_mxnet_inference_uri( ) +def test_pytorch_training(pytorch_training_version, pytorch_training_py_version): + _test_image_uris( + "pytorch", + pytorch_training_version, + pytorch_training_py_version, + "training", + _expected_pytorch_training_uri, + {"pytorch_version": pytorch_training_version, "py_version": pytorch_training_py_version}, + ) + + +def _expected_pytorch_training_uri(pytorch_version, py_version, processor="cpu", region=REGION): + version = Version(pytorch_version) + if version < Version("1.2"): + repo = "sagemaker-pytorch" + else: + repo = "pytorch-training" + + return expected_uris.framework_uri( + repo, + pytorch_version, + _sagemaker_or_dlc_account(repo, region), + py_version=py_version, + processor=processor, + region=region, + ) + + +def test_pytorch_inference(pytorch_inference_version, pytorch_inference_py_version): + _test_image_uris( + "pytorch", + pytorch_inference_version, + pytorch_inference_py_version, + "inference", + _expected_pytorch_inference_uri, + {"pytorch_version": pytorch_inference_version, "py_version": pytorch_inference_py_version}, + ) + + +def _expected_pytorch_inference_uri(pytorch_version, py_version, processor="cpu", region=REGION): + version = Version(pytorch_version) + if version < Version("1.2"): + repo = "sagemaker-pytorch" + else: + repo = "pytorch-inference" + + return expected_uris.framework_uri( + repo, + pytorch_version, + _sagemaker_or_dlc_account(repo, region), + py_version=py_version, + processor=processor, + region=region, + ) + + +def test_pytorch_eia(pytorch_eia_version, pytorch_eia_py_version): + base_args = { + "framework": "pytorch", + "version": pytorch_eia_version, + "py_version": pytorch_eia_py_version, + "image_scope": "inference", + "instance_type": "ml.c4.xlarge", + "accelerator_type": "ml.eia1.medium", + } + + uri = image_uris.retrieve(region=REGION, **base_args) + + expected = expected_uris.framework_uri( + "pytorch-inference-eia", + pytorch_eia_version, + DLC_ACCOUNT, + py_version=pytorch_eia_py_version, + region=REGION, + ) + assert expected == uri + + for region, account in DLC_ALTERNATE_REGION_ACCOUNTS.items(): + uri = image_uris.retrieve(region=region, **base_args) + + expected = expected_uris.framework_uri( + "pytorch-inference-eia", + pytorch_eia_version, + account, + py_version=pytorch_eia_py_version, + region=region, + ) + assert expected == uri + + def _sagemaker_or_dlc_account(repo, region): if repo.startswith("sagemaker"): return ( diff --git a/tests/unit/sagemaker/image_uris/test_retrieve.py b/tests/unit/sagemaker/image_uris/test_retrieve.py index c8f10249ac..dd2c1de0f8 100644 --- a/tests/unit/sagemaker/image_uris/test_retrieve.py +++ b/tests/unit/sagemaker/image_uris/test_retrieve.py @@ -127,6 +127,17 @@ def test_retrieve_aliased_version(config_for_framework): ) assert "123412341234.dkr.ecr.us-west-2.amazonaws.com/dummy:{}-cpu-py3".format(version) == uri + del config["versions"]["1.1.0"] + uri = image_uris.retrieve( + framework="useless-string", + version=version, + py_version="py3", + instance_type="ml.c4.xlarge", + region="us-west-2", + image_scope="training", + ) + assert "123412341234.dkr.ecr.us-west-2.amazonaws.com/dummy:{}-cpu-py3".format(version) == uri + @patch("sagemaker.image_uris.config_for_framework") def test_retrieve_default_version_if_possible(config_for_framework, caplog): diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py index ec1c6a5cc6..a6b1014871 100644 --- a/tests/unit/test_pytorch.py +++ b/tests/unit/test_pytorch.py @@ -19,9 +19,9 @@ from mock import ANY, MagicMock, Mock, patch from packaging.version import Version +from sagemaker import image_uris from sagemaker.pytorch import defaults -from sagemaker.pytorch import PyTorch -from sagemaker.pytorch import PyTorchPredictor, PyTorchModel +from sagemaker.pytorch import PyTorch, PyTorchPredictor, PyTorchModel DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data") @@ -37,7 +37,6 @@ ACCELERATOR_TYPE = "ml.eia.medium" IMAGE_URI = "sagemaker-pytorch" JOB_NAME = "{}-{}".format(IMAGE_URI, TIMESTAMP) -IMAGE_URI_FORMAT_STRING = "520713654638.dkr.ecr.{}.amazonaws.com/{}:{}-{}-{}" ROLE = "Dummy" REGION = "us-west-2" GPU = "ml.p2.xlarge" @@ -80,15 +79,14 @@ def fixture_sagemaker_session(): def _get_full_cpu_image_uri(version, py_version): - return IMAGE_URI_FORMAT_STRING.format(REGION, IMAGE_URI, version, "cpu", py_version) - - -def _get_full_gpu_image_uri(version, py_version): - return IMAGE_URI_FORMAT_STRING.format(REGION, IMAGE_URI, version, "gpu", py_version) - - -def _get_full_cpu_image_uri_with_ei(version, py_version): - return _get_full_cpu_image_uri(version, py_version=py_version) + "-eia" + return image_uris.retrieve( + "pytorch", + REGION, + version=version, + py_version=py_version, + instance_type=CPU, + image_scope="training", + ) def _pytorch_estimator( @@ -157,8 +155,24 @@ def _create_train_job(version, py_version): } +def _get_environment(submit_directory, model_url, image_uri): + return { + "Environment": { + "SAGEMAKER_SUBMIT_DIRECTORY": submit_directory, + "SAGEMAKER_PROGRAM": "dummy_script.py", + "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS": "false", + "SAGEMAKER_REGION": "us-west-2", + "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", + }, + "Image": image_uri, + "ModelDataUrl": model_url, + } + + @patch("sagemaker.estimator.name_from_base") -def test_create_model(name_from_base, sagemaker_session, pytorch_version, pytorch_py_version): +def test_create_model( + name_from_base, sagemaker_session, pytorch_inference_version, pytorch_inference_py_version +): container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" base_job_name = "job" @@ -169,8 +183,8 @@ def test_create_model(name_from_base, sagemaker_session, pytorch_version, pytorc sagemaker_session=sagemaker_session, instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, - framework_version=pytorch_version, - py_version=pytorch_py_version, + framework_version=pytorch_inference_version, + py_version=pytorch_inference_py_version, container_log_level=container_log_level, base_job_name=base_job_name, source_dir=source_dir, @@ -183,8 +197,8 @@ def test_create_model(name_from_base, sagemaker_session, pytorch_version, pytorc model = pytorch.create_model() assert model.sagemaker_session == sagemaker_session - assert model.framework_version == pytorch_version - assert model.py_version == pytorch_py_version + assert model.framework_version == pytorch_inference_version + assert model.py_version == pytorch_inference_py_version assert model.entry_point == SCRIPT_PATH assert model.role == ROLE assert model.name == model_name @@ -195,14 +209,16 @@ def test_create_model(name_from_base, sagemaker_session, pytorch_version, pytorc name_from_base.assert_called_with(base_job_name) -def test_create_model_with_optional_params(sagemaker_session, pytorch_version, pytorch_py_version): +def test_create_model_with_optional_params( + sagemaker_session, pytorch_inference_version, pytorch_inference_py_version +): container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" enable_cloudwatch_metrics = "true" pytorch = PyTorch( entry_point=SCRIPT_PATH, - framework_version=pytorch_version, - py_version=pytorch_py_version, + framework_version=pytorch_inference_version, + py_version=pytorch_inference_py_version, role=ROLE, sagemaker_session=sagemaker_session, instance_count=INSTANCE_COUNT, @@ -272,17 +288,21 @@ def test_create_model_with_custom_image(name_from_base, sagemaker_session): name_from_base.assert_called_with(base_job_name) +@patch("sagemaker.utils.repack_model", MagicMock()) @patch("sagemaker.utils.create_tar_file", MagicMock()) -@patch("time.strftime", return_value=TIMESTAMP) -def test_pytorch(strftime, sagemaker_session, pytorch_version, pytorch_py_version): +@patch("sagemaker.estimator.name_from_base", return_value=JOB_NAME) +def test_pytorch( + name_from_base, sagemaker_session, pytorch_inference_version, pytorch_inference_py_version +): pytorch = PyTorch( entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, - framework_version=pytorch_version, - py_version=pytorch_py_version, + framework_version=pytorch_inference_version, + py_version=pytorch_inference_py_version, + enable_sagemaker_metrics=False, ) inputs = "s3://mybucket/train" @@ -294,43 +314,45 @@ def test_pytorch(strftime, sagemaker_session, pytorch_version, pytorch_py_versio boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls] assert boto_call_names == ["resource"] - expected_train_args = _create_train_job(pytorch_version, pytorch_py_version) + expected_train_args = _create_train_job(pytorch_inference_version, pytorch_inference_py_version) expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs expected_train_args["experiment_config"] = EXPERIMENT_CONFIG + expected_train_args["enable_sagemaker_metrics"] = False actual_train_args = sagemaker_session.method_calls[0][2] assert actual_train_args == expected_train_args model = pytorch.create_model() - expected_image_base = "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-pytorch:{}-gpu-{}" - assert { - "Environment": { - "SAGEMAKER_SUBMIT_DIRECTORY": "s3://mybucket/sagemaker-pytorch-{}/source/sourcedir.tar.gz".format( - TIMESTAMP - ), - "SAGEMAKER_PROGRAM": "dummy_script.py", - "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS": "false", - "SAGEMAKER_REGION": "us-west-2", - "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", - }, - "Image": expected_image_base.format(pytorch_version, pytorch_py_version), - "ModelDataUrl": "s3://m/m.tar.gz", - } == model.prepare_container_def(GPU) + expected_image_uri = image_uris.retrieve( + "pytorch", + REGION, + version=pytorch_inference_version, + py_version=pytorch_inference_py_version, + instance_type=GPU, + image_scope="inference", + ) + + actual_environment = model.prepare_container_def(GPU) + submit_directory = actual_environment["Environment"]["SAGEMAKER_SUBMIT_DIRECTORY"] + model_url = actual_environment["ModelDataUrl"] + expected_environment = _get_environment(submit_directory, model_url, expected_image_uri) + assert actual_environment == expected_environment assert "cpu" in model.prepare_container_def(CPU)["Image"] predictor = pytorch.deploy(1, GPU) assert isinstance(predictor, PyTorchPredictor) +@patch("sagemaker.utils.repack_model", MagicMock()) @patch("sagemaker.utils.create_tar_file", MagicMock()) -def test_model(sagemaker_session, pytorch_version, pytorch_py_version): +def test_model(sagemaker_session, pytorch_inference_version, pytorch_inference_py_version): model = PyTorchModel( MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, - framework_version=pytorch_version, - py_version=pytorch_py_version, + framework_version=pytorch_inference_version, + py_version=pytorch_inference_py_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, GPU) @@ -404,52 +426,9 @@ def test_model_prepare_container_def_no_instance_type_or_image(): assert expected_msg in str(e) -def test_train_image_default(sagemaker_session, pytorch_version, pytorch_py_version): - pytorch = PyTorch( - entry_point=SCRIPT_PATH, - framework_version=pytorch_version, - py_version=pytorch_py_version, - role=ROLE, - sagemaker_session=sagemaker_session, - instance_count=INSTANCE_COUNT, - instance_type=INSTANCE_TYPE, - ) - - assert _get_full_cpu_image_uri(pytorch_version, pytorch_py_version) in pytorch.train_image() - - -def test_train_image_cpu_instances(sagemaker_session, pytorch_version, pytorch_py_version): - pytorch = _pytorch_estimator( - sagemaker_session, pytorch_version, pytorch_py_version, instance_type="ml.c2.2xlarge" - ) - assert pytorch.train_image() == _get_full_cpu_image_uri(pytorch_version, pytorch_py_version) - - pytorch = _pytorch_estimator( - sagemaker_session, pytorch_version, pytorch_py_version, instance_type="ml.c4.2xlarge" - ) - assert pytorch.train_image() == _get_full_cpu_image_uri(pytorch_version, pytorch_py_version) - - pytorch = _pytorch_estimator( - sagemaker_session, pytorch_version, pytorch_py_version, instance_type="ml.m16" - ) - assert pytorch.train_image() == _get_full_cpu_image_uri(pytorch_version, pytorch_py_version) - - -def test_train_image_gpu_instances(sagemaker_session, pytorch_version, pytorch_py_version): - pytorch = _pytorch_estimator( - sagemaker_session, pytorch_version, pytorch_py_version, instance_type="ml.g2.2xlarge" - ) - assert pytorch.train_image() == _get_full_gpu_image_uri(pytorch_version, pytorch_py_version) - - pytorch = _pytorch_estimator( - sagemaker_session, pytorch_version, pytorch_py_version, instance_type="ml.p2.2xlarge" - ) - assert pytorch.train_image() == _get_full_gpu_image_uri(pytorch_version, pytorch_py_version) - - -def test_attach(sagemaker_session, pytorch_version, pytorch_py_version): +def test_attach(sagemaker_session, pytorch_training_version, pytorch_training_py_version): training_image = "1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-pytorch:{}-cpu-{}".format( - pytorch_version, pytorch_py_version + pytorch_training_version, pytorch_training_py_version ) returned_job_description = { "AlgorithmSpecification": {"TrainingInputMode": "File", "TrainingImage": training_image}, @@ -482,8 +461,8 @@ def test_attach(sagemaker_session, pytorch_version, pytorch_py_version): estimator = PyTorch.attach(training_job_name="neo", sagemaker_session=sagemaker_session) assert estimator.latest_training_job.job_name == "neo" - assert estimator.py_version == pytorch_py_version - assert estimator.framework_version == pytorch_version + assert estimator.py_version == pytorch_training_py_version + assert estimator.framework_version == pytorch_training_version assert estimator.role == "arn:aws:iam::366:role/SageMakerRole" assert estimator.instance_count == 1 assert estimator.max_run == 24 * 60 * 60 @@ -571,14 +550,14 @@ def test_attach_custom_image(sagemaker_session): @patch("sagemaker.pytorch.estimator.python_deprecation_warning") -def test_estimator_py2_warning(warning, sagemaker_session, pytorch_version): +def test_estimator_py2_warning(warning, sagemaker_session, pytorch_training_version): estimator = PyTorch( entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, - framework_version=pytorch_version, + framework_version=pytorch_training_version, py_version="py2", ) @@ -587,53 +566,65 @@ def test_estimator_py2_warning(warning, sagemaker_session, pytorch_version): @patch("sagemaker.pytorch.model.python_deprecation_warning") -def test_model_py2_warning(warning, sagemaker_session, pytorch_version): +def test_model_py2_warning(warning, sagemaker_session, pytorch_inference_version): model = PyTorchModel( MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session, - framework_version=pytorch_version, + framework_version=pytorch_inference_version, py_version="py2", ) assert model.py_version == "py2" warning.assert_called_with(model.__framework_name__, defaults.LATEST_PY2_VERSION) -def test_pt_enable_sm_metrics(sagemaker_session, pytorch_version, pytorch_py_version): +def test_pt_enable_sm_metrics( + sagemaker_session, pytorch_training_version, pytorch_training_py_version +): pytorch = _pytorch_estimator( sagemaker_session, - framework_version=pytorch_version, - py_version=pytorch_py_version, + framework_version=pytorch_training_version, + py_version=pytorch_training_py_version, enable_sagemaker_metrics=True, ) assert pytorch.enable_sagemaker_metrics -def test_pt_disable_sm_metrics(sagemaker_session, pytorch_version, pytorch_py_version): +def test_pt_disable_sm_metrics( + sagemaker_session, pytorch_training_version, pytorch_training_py_version +): pytorch = _pytorch_estimator( sagemaker_session, - framework_version=pytorch_version, - py_version=pytorch_py_version, + framework_version=pytorch_training_version, + py_version=pytorch_training_py_version, enable_sagemaker_metrics=False, ) assert not pytorch.enable_sagemaker_metrics -def test_pt_default_sm_metrics(sagemaker_session, pytorch_version, pytorch_py_version): +def test_pt_default_sm_metrics( + sagemaker_session, pytorch_training_version, pytorch_training_py_version +): pytorch = _pytorch_estimator( - sagemaker_session, framework_version=pytorch_version, py_version=pytorch_py_version + sagemaker_session, + framework_version=pytorch_training_version, + py_version=pytorch_training_py_version, ) - if Version(pytorch_version) < Version("1.3"): + if Version(pytorch_training_version) < Version("1.3"): assert pytorch.enable_sagemaker_metrics is None else: assert pytorch.enable_sagemaker_metrics -def test_custom_image_estimator_deploy(sagemaker_session, pytorch_version, pytorch_py_version): +def test_custom_image_estimator_deploy( + sagemaker_session, pytorch_inference_version, pytorch_inference_py_version +): custom_image = "mycustomimage:latest" pytorch = _pytorch_estimator( - sagemaker_session, framework_version=pytorch_version, py_version=pytorch_py_version + sagemaker_session, + framework_version=pytorch_inference_version, + py_version=pytorch_inference_py_version, ) pytorch.fit(inputs="s3://mybucket/train", job_name="new_name") model = pytorch.create_model(image_uri=custom_image) From 75b46db322596f3e0dfc9438049ce1afc8b9193e Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Fri, 17 Jul 2020 08:10:11 -0700 Subject: [PATCH 2/2] fix ei fixture --- tests/integ/test_pytorch.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/integ/test_pytorch.py b/tests/integ/test_pytorch.py index 572e01992d..c5ae32677b 100644 --- a/tests/integ/test_pytorch.py +++ b/tests/integ/test_pytorch.py @@ -160,10 +160,7 @@ def test_deploy_packed_model_with_entry_point_name( test_region() not in EI_SUPPORTED_REGIONS, reason="EI isn't supported in that specific region." ) def test_deploy_model_with_accelerator( - sagemaker_session, - cpu_instance_type, - pytorch_eia_latest_ei_version, - pytorch_eia_latest_py_version, + sagemaker_session, cpu_instance_type, pytorch_eia_latest_version, pytorch_eia_latest_py_version, ): endpoint_name = "test-pytorch-deploy-eia-{}".format(sagemaker_timestamp()) model_data = sagemaker_session.upload_data(path=EIA_MODEL) @@ -171,7 +168,7 @@ def test_deploy_model_with_accelerator( model_data, "SageMakerRole", entry_point=EIA_SCRIPT, - framework_version=pytorch_eia_latest_ei_version, + framework_version=pytorch_eia_latest_version, py_version=pytorch_eia_latest_py_version, sagemaker_session=sagemaker_session, )