diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index 5d380ecf49..1f2cc9214e 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -131,6 +131,7 @@ "1.12", "1.12.0", "1.12.1", + "1.13.1", ], } @@ -143,6 +144,7 @@ "1.12", "1.12.0", "1.12.1", + "1.13.1", ] diff --git a/src/sagemaker/image_uri_config/pytorch.json b/src/sagemaker/image_uri_config/pytorch.json index 6853ac528b..c82b656539 100644 --- a/src/sagemaker/image_uri_config/pytorch.json +++ b/src/sagemaker/image_uri_config/pytorch.json @@ -74,7 +74,8 @@ "1.9": "1.9.1", "1.10": "1.10.2", "1.11": "1.11.0", - "1.12": "1.12.1" + "1.12": "1.12.1", + "1.13": "1.13.1" }, "versions": { "0.4.0": { @@ -783,6 +784,42 @@ "us-west-2": "763104351884" }, "repository": "pytorch-inference" + }, + "1.13.1": { + "py_versions": [ + "py39" + ], + "registries": { + "af-south-1": "626614931356", + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-northeast-3": "364406365360", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ap-southeast-3": "907027046896", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "eu-south-1": "692866216735", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-east-1": "446045086412", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-isob-east-1": "094389454867", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "pytorch-inference" } } }, @@ -855,7 +892,8 @@ "1.9": "1.9.1", "1.10": "1.10.2", "1.11": "1.11.0", - "1.12": "1.12.1" + "1.12": "1.12.1", + "1.13": "1.13.1" }, "versions": { "0.4.0": { @@ -1520,6 +1558,42 @@ "us-west-2": "763104351884" }, "repository": "pytorch-training" + }, + "1.13.1": { + "py_versions": [ + "py39" + ], + "registries": { + "af-south-1": "626614931356", + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-northeast-3": "364406365360", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ap-southeast-3": "907027046896", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "eu-south-1": "692866216735", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-east-1": "446045086412", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-isob-east-1": "094389454867", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "pytorch-training" } } } diff --git a/tests/conftest.py b/tests/conftest.py index f6682ebb8c..208cdcb221 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -86,6 +86,8 @@ "huggingface_training_compiler", ) +PYTORCH_RENEWED_GPU = "ml.g4dn.xlarge" + def pytest_addoption(parser): parser.addoption("--sagemaker-client-config", action="store", default=None) @@ -221,22 +223,26 @@ def mxnet_eia_latest_py_version(): @pytest.fixture(scope="module", params=["py2", "py3"]) def pytorch_training_py_version(pytorch_training_version, request): - if Version(pytorch_training_version) < Version("1.5.0"): - return request.param + if Version(pytorch_training_version) >= Version("1.13"): + return "py39" elif Version(pytorch_training_version) >= Version("1.9"): return "py38" - else: + elif Version(pytorch_training_version) >= Version("1.5.0"): return "py3" + else: + return request.param @pytest.fixture(scope="module", params=["py2", "py3"]) def pytorch_inference_py_version(pytorch_inference_version, request): - if Version(pytorch_inference_version) < Version("1.4.0"): - return request.param + if Version(pytorch_inference_version) >= Version("1.13"): + return "py39" elif Version(pytorch_inference_version) >= Version("1.9"): return "py38" - else: + elif Version(pytorch_inference_version) >= Version("1.4.0"): return "py3" + else: + return request.param @pytest.fixture(scope="module") @@ -252,9 +258,13 @@ def huggingface_pytorch_training_py_version(huggingface_pytorch_training_version @pytest.fixture(scope="module") -def huggingface_training_compiler_pytorch_version(huggingface_training_compiler_version): +def huggingface_training_compiler_pytorch_version( + huggingface_training_compiler_version, +): versions = _huggingface_base_fm_version( - huggingface_training_compiler_version, "pytorch", "huggingface_training_compiler" + huggingface_training_compiler_version, + "pytorch", + "huggingface_training_compiler", ) if not versions: pytest.skip( @@ -265,9 +275,13 @@ def huggingface_training_compiler_pytorch_version(huggingface_training_compiler_ @pytest.fixture(scope="module") -def huggingface_training_compiler_tensorflow_version(huggingface_training_compiler_version): +def huggingface_training_compiler_tensorflow_version( + huggingface_training_compiler_version, +): versions = _huggingface_base_fm_version( - huggingface_training_compiler_version, "tensorflow", "huggingface_training_compiler" + huggingface_training_compiler_version, + "tensorflow", + "huggingface_training_compiler", ) if not versions: pytest.skip( @@ -289,19 +303,25 @@ def huggingface_training_compiler_tensorflow_py_version( @pytest.fixture(scope="module") -def huggingface_training_compiler_pytorch_py_version(huggingface_training_compiler_pytorch_version): +def huggingface_training_compiler_pytorch_py_version( + huggingface_training_compiler_pytorch_version, +): return "py38" @pytest.fixture(scope="module") -def huggingface_pytorch_latest_training_py_version(huggingface_training_pytorch_latest_version): +def huggingface_pytorch_latest_training_py_version( + huggingface_training_pytorch_latest_version, +): return ( "py38" if Version(huggingface_training_pytorch_latest_version) >= Version("1.9") else "py36" ) @pytest.fixture(scope="module") -def huggingface_pytorch_latest_inference_py_version(huggingface_inference_pytorch_latest_version): +def huggingface_pytorch_latest_inference_py_version( + huggingface_inference_pytorch_latest_version, +): return ( "py38" if Version(huggingface_inference_pytorch_latest_version) >= Version("1.9") @@ -477,7 +497,8 @@ def pytorch_ddp_py_version(): @pytest.fixture( - scope="module", params=["1.10", "1.10.0", "1.10.2", "1.11", "1.11.0", "1.12", "1.12.0"] + scope="module", + params=["1.10", "1.10.0", "1.10.2", "1.11", "1.11.0", "1.12", "1.12.0"], ) def pytorch_ddp_framework_version(request): return request.param @@ -511,6 +532,23 @@ def gpu_instance_type(sagemaker_session, request): return "ml.p3.2xlarge" +@pytest.fixture() +def gpu_pytorch_instance_type(sagemaker_session, request): + if "pytorch_inference_version" in request.fixturenames: + fw_version = request.getfixturevalue("pytorch_inference_version") + else: + fw_version = request.param + + region = sagemaker_session.boto_session.region_name + if region in NO_P3_REGIONS: + if Version(fw_version) >= Version("1.13"): + return PYTORCH_RENEWED_GPU + else: + return "ml.p2.xlarge" + else: + return "ml.p3.2xlarge" + + @pytest.fixture(scope="session") def gpu_instance_type_list(sagemaker_session, request): region = sagemaker_session.boto_session.region_name diff --git a/tests/unit/sagemaker/image_uris/test_dlc_frameworks.py b/tests/unit/sagemaker/image_uris/test_dlc_frameworks.py index 38638aa472..0f00f7eb22 100644 --- a/tests/unit/sagemaker/image_uris/test_dlc_frameworks.py +++ b/tests/unit/sagemaker/image_uris/test_dlc_frameworks.py @@ -18,6 +18,7 @@ from tests.unit.sagemaker.image_uris import expected_uris INSTANCE_TYPES_AND_PROCESSORS = (("ml.c4.xlarge", "cpu"), ("ml.p2.xlarge", "gpu")) +RENEWED_PYTORCH_INSTANCE_TYPES_AND_PROCESSORS = (("ml.c4.xlarge", "cpu"), ("ml.g4dn.xlarge", "gpu")) REGION = "us-west-2" DLC_ACCOUNT = "763104351884" @@ -70,7 +71,12 @@ def _test_image_uris( "image_scope": scope, } - for instance_type, processor in INSTANCE_TYPES_AND_PROCESSORS: + TYPES_AND_PROCESSORS = INSTANCE_TYPES_AND_PROCESSORS + if framework == "pytorch" and Version(fw_version) >= Version("1.13"): + """Handle P2 deprecation""" + TYPES_AND_PROCESSORS = RENEWED_PYTORCH_INSTANCE_TYPES_AND_PROCESSORS + + for instance_type, processor in TYPES_AND_PROCESSORS: uri = image_uris.retrieve(region=REGION, instance_type=instance_type, **base_args) expected = expected_fn(processor=processor, **expected_fn_args) diff --git a/tests/unit/test_fw_utils.py b/tests/unit/test_fw_utils.py index 13ba99d665..8645f05159 100644 --- a/tests/unit/test_fw_utils.py +++ b/tests/unit/test_fw_utils.py @@ -912,6 +912,7 @@ def test_validate_smdataparallel_args_not_raises(): ("ml.p3.16xlarge", "pytorch", "1.12.0", "py38", smdataparallel_enabled), ("ml.p3.16xlarge", "pytorch", "1.12.1", "py38", smdataparallel_enabled), ("ml.p3.16xlarge", "pytorch", "1.12", "py38", smdataparallel_enabled), + ("ml.p3.16xlarge", "pytorch", "1.13.1", "py39", smdataparallel_enabled), ("ml.p3.16xlarge", "tensorflow", "2.4.1", "py3", smdataparallel_enabled_custom_mpi), ("ml.p3.16xlarge", "tensorflow", "2.4.1", "py37", smdataparallel_enabled_custom_mpi), ("ml.p3.16xlarge", "tensorflow", "2.4.3", "py3", smdataparallel_enabled_custom_mpi), @@ -932,6 +933,7 @@ def test_validate_smdataparallel_args_not_raises(): ("ml.p3.16xlarge", "pytorch", "1.11.0", "py38", smdataparallel_enabled_custom_mpi), ("ml.p3.16xlarge", "pytorch", "1.12.0", "py38", smdataparallel_enabled_custom_mpi), ("ml.p3.16xlarge", "pytorch", "1.12.1", "py38", smdataparallel_enabled_custom_mpi), + ("ml.p3.16xlarge", "pytorch", "1.13.1", "py39", smdataparallel_enabled_custom_mpi), ] for instance_type, framework_name, framework_version, py_version, distribution in good_args: fw_utils._validate_smdataparallel_args( diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py index a11738e25c..30b1251219 100644 --- a/tests/unit/test_pytorch.py +++ b/tests/unit/test_pytorch.py @@ -302,7 +302,12 @@ def test_create_model_with_custom_image(name_from_base, sagemaker_session): @patch("sagemaker.estimator.name_from_base", return_value=JOB_NAME) @patch("time.time", return_value=TIME) def test_pytorch( - time, name_from_base, sagemaker_session, pytorch_inference_version, pytorch_inference_py_version + time, + name_from_base, + sagemaker_session, + pytorch_inference_version, + pytorch_inference_py_version, + gpu_pytorch_instance_type, ): pytorch = PyTorch( entry_point=SCRIPT_PATH, @@ -339,24 +344,29 @@ def test_pytorch( REGION, version=pytorch_inference_version, py_version=pytorch_inference_py_version, - instance_type=GPU, + instance_type=gpu_pytorch_instance_type, image_scope="inference", ) - actual_environment = model.prepare_container_def(GPU) + actual_environment = model.prepare_container_def(gpu_pytorch_instance_type) submit_directory = actual_environment["Environment"]["SAGEMAKER_SUBMIT_DIRECTORY"] model_url = actual_environment["ModelDataUrl"] expected_environment = _get_environment(submit_directory, model_url, expected_image_uri) assert actual_environment == expected_environment assert "cpu" in model.prepare_container_def(CPU)["Image"] - predictor = pytorch.deploy(1, GPU) + predictor = pytorch.deploy(1, gpu_pytorch_instance_type) assert isinstance(predictor, PyTorchPredictor) @patch("sagemaker.utils.repack_model", MagicMock()) @patch("sagemaker.utils.create_tar_file", MagicMock()) -def test_model(sagemaker_session, pytorch_inference_version, pytorch_inference_py_version): +def test_model( + sagemaker_session, + pytorch_inference_version, + pytorch_inference_py_version, + gpu_pytorch_instance_type, +): model = PyTorchModel( MODEL_DATA, role=ROLE, @@ -365,13 +375,14 @@ def test_model(sagemaker_session, pytorch_inference_version, pytorch_inference_p py_version=pytorch_inference_py_version, sagemaker_session=sagemaker_session, ) - predictor = model.deploy(1, GPU) + predictor = model.deploy(1, gpu_pytorch_instance_type) assert isinstance(predictor, PyTorchPredictor) @patch("sagemaker.utils.create_tar_file", MagicMock()) @patch("sagemaker.utils.repack_model") -def test_mms_model(repack_model, sagemaker_session): +@pytest.mark.parametrize("gpu_pytorch_instance_type", ["1.2"], indirect=True) +def test_mms_model(repack_model, sagemaker_session, gpu_pytorch_instance_type): PyTorchModel( MODEL_DATA, role=ROLE, @@ -379,7 +390,7 @@ def test_mms_model(repack_model, sagemaker_session): sagemaker_session=sagemaker_session, framework_version="1.2", py_version="py3", - ).deploy(1, GPU) + ).deploy(1, gpu_pytorch_instance_type) repack_model.assert_called_with( dependencies=[], @@ -428,6 +439,7 @@ def test_model_custom_serialization( sagemaker_session, pytorch_inference_version, pytorch_inference_py_version, + gpu_pytorch_instance_type, ): model = PyTorchModel( MODEL_DATA, @@ -441,7 +453,7 @@ def test_model_custom_serialization( custom_deserializer = Mock() predictor = model.deploy( 1, - GPU, + gpu_pytorch_instance_type, serializer=custom_serializer, deserializer=custom_deserializer, )