diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index 2b05d5ea55..496da7ca55 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -104,6 +104,13 @@ "pytorch-serving-eia": [1, 3, 1], } +INFERENTIA_VERSION_RANGES = { + "neo-mxnet": [[1, 5, 1], [1, 5, 1]], + "neo-tensorflow": [[1, 15, 0], [1, 15, 0]], +} + +INFERENTIA_SUPPORTED_REGIONS = ["us-east-1", "us-west-2"] + DEBUGGER_UNSUPPORTED_REGIONS = ["us-gov-west-1", "us-iso-east-1"] @@ -124,6 +131,23 @@ def is_version_equal_or_higher(lowest_version, framework_version): return version_list >= lowest_version[0 : len(version_list)] +def is_version_equal_or_lower(highest_version, framework_version): + """Determine whether the ``framework_version`` is equal to or lower than + ``highest_version`` + + Args: + highest_version (List[int]): highest version represented in an integer + list + framework_version (str): framework version string + + Returns: + bool: Whether or not ``framework_version`` is equal to or lower than + ``highest_version`` + """ + version_list = [int(s) for s in framework_version.split(".")] + return version_list <= highest_version[0 : len(version_list)] + + def _is_dlc_version(framework, framework_version, py_version): """Return if the framework's version uses the corresponding DLC image. @@ -144,6 +168,23 @@ def _is_dlc_version(framework, framework_version, py_version): return False +def _is_inferentia_supported(framework, framework_version): + """Return if Inferentia supports the framework and its version. + + Args: + framework (str): The framework name, e.g. "tensorflow" + framework_version (str): The framework version + + Returns: + bool: Whether or not Inferentia supports the framework and its version. + """ + lowest_version_list = INFERENTIA_VERSION_RANGES.get(framework)[0] + highest_version_list = INFERENTIA_VERSION_RANGES.get(framework)[1] + return is_version_equal_or_higher( + lowest_version_list, framework_version + ) and is_version_equal_or_lower(highest_version_list, framework_version) + + def _registry_id(region, framework, py_version, account, framework_version): """Return the Amazon ECR registry number (or AWS account ID) for the given framework, framework version, Python version, and region. @@ -240,11 +281,34 @@ def create_image_uri( # 'cpu' or 'gpu'. if family in optimized_families: device_type = family + elif family.startswith("inf"): + device_type = "inf" elif family[0] in ["g", "p"]: device_type = "gpu" else: device_type = "cpu" + if device_type == "inf": + if region not in INFERENTIA_SUPPORTED_REGIONS: + raise ValueError( + "Inferentia is not supported in region {}. Supported regions are {}".format( + region, ", ".join(INFERENTIA_SUPPORTED_REGIONS) + ) + ) + if framework not in INFERENTIA_VERSION_RANGES: + raise ValueError( + "Inferentia does not support {}. Currently it supports " + "MXNet and TensorFlow with more frameworks coming soon.".format( + framework.split("-")[-1] + ) + ) + if not _is_inferentia_supported(framework, framework_version): + raise ValueError( + "Inferentia is not supported with {} version {}.".format( + framework.split("-")[-1], framework_version + ) + ) + use_dlc_image = _is_dlc_version(framework, framework_version, py_version) if not py_version or (use_dlc_image and framework == "tensorflow-serving-eia"): diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index 12fbbf741b..a58bafdd76 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -50,6 +50,8 @@ "us-gov-west-1": "263933020539", } +INFERENTIA_INSTANCE_PREFIX = "ml_inf" + class Model(object): """A SageMaker ``Model`` that can be deployed to an ``Endpoint``.""" @@ -286,6 +288,23 @@ def _neo_image(self, region, target_instance_type, framework, framework_version) account=self._neo_image_account(region), ) + def _inferentia_image(self, region, target_instance_type, framework, framework_version): + """ + Args: + region: + target_instance_type: + framework: + framework_version: + """ + return fw_utils.create_image_uri( + region, + "neo-" + framework.lower(), + target_instance_type.replace("_", "."), + framework_version, + py_version="py3", + account=self._neo_image_account(region), + ) + def compile( self, target_instance_family, @@ -364,6 +383,14 @@ def compile( framework_version, ) self._is_compiled_model = True + elif target_instance_family.startswith(INFERENTIA_INSTANCE_PREFIX): + self.image = self._inferentia_image( + self.sagemaker_session.boto_region_name, + target_instance_family, + framework, + framework_version, + ) + self._is_compiled_model = True else: LOGGER.warning( "The instance type %s is not supported to deploy via SageMaker," @@ -437,6 +464,11 @@ def deploy( if self.role is None: raise ValueError("Role can not be null for deploying a model") + if instance_type.startswith("ml.inf") and not self._is_compiled_model: + LOGGER.warning( + "Your model is not compiled. Please compile your model before using Inferentia." + ) + compiled_model_suffix = "-".join(instance_type.split(".")[:-1]) if self._is_compiled_model: name_prefix = self.name or utils.name_from_image(self.image) diff --git a/tests/conftest.py b/tests/conftest.py index 65338ab4be..9edd6d0b86 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -269,6 +269,11 @@ def cpu_instance_type(sagemaker_session, request): return "ml.m4.xlarge" +@pytest.fixture(scope="session") +def inf_instance_type(sagemaker_session, request): + return "ml.inf1.xlarge" + + @pytest.fixture(scope="session") def ec2_instance_type(cpu_instance_type): return cpu_instance_type[3:] @@ -289,6 +294,11 @@ def cpu_instance_family(cpu_instance_type): return "_".join(cpu_instance_type.split(".")[0:2]) +@pytest.fixture(scope="session") +def inf_instance_family(inf_instance_type): + return "_".join(inf_instance_type.split(".")[0:2]) + + def pytest_generate_tests(metafunc): if "instance_type" in metafunc.fixturenames: boto_config = metafunc.config.getoption("--boto-config") diff --git a/tests/integ/test_neo_mxnet.py b/tests/integ/test_neo_mxnet.py index 8c5d43b9b7..1786347554 100644 --- a/tests/integ/test_neo_mxnet.py +++ b/tests/integ/test_neo_mxnet.py @@ -24,6 +24,7 @@ from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name NEO_MXNET_VERSION = "1.4.1" # Neo doesn't support MXNet 1.6 yet. +INF_MXNET_VERSION = "1.5.1" @pytest.fixture(scope="module") @@ -110,3 +111,38 @@ def test_deploy_model( predictor.content_type = "application/vnd+python.numpy+binary" data = numpy.zeros(shape=(1, 1, 28, 28)) predictor.predict(data) + + +@pytest.mark.skip(reason="Inferentia is not supported yet.") +def test_inferentia_deploy_model( + mxnet_training_job, sagemaker_session, inf_instance_type, inf_instance_family +): + endpoint_name = unique_name_from_base("test-neo-deploy-model") + + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): + desc = sagemaker_session.sagemaker_client.describe_training_job( + TrainingJobName=mxnet_training_job + ) + model_data = desc["ModelArtifacts"]["S3ModelArtifacts"] + script_path = os.path.join(DATA_DIR, "mxnet_mnist", "mnist_neo.py") + role = "SageMakerRole" + model = MXNetModel( + model_data, + role, + entry_point=script_path, + framework_version=INF_MXNET_VERSION, + sagemaker_session=sagemaker_session, + ) + + model.compile( + target_instance_family=inf_instance_family, + input_shape={"data": [1, 1, 28, 28]}, + role=role, + job_name=unique_name_from_base("test-deploy-model-compilation-job"), + output_path="/".join(model_data.split("/")[:-1]), + ) + predictor = model.deploy(1, inf_instance_type, endpoint_name=endpoint_name) + + predictor.content_type = "application/vnd+python.numpy+binary" + data = numpy.zeros(shape=(1, 1, 28, 28)) + predictor.predict(data) diff --git a/tests/unit/test_fw_utils.py b/tests/unit/test_fw_utils.py index 4c2196799d..489b07a17c 100644 --- a/tests/unit/test_fw_utils.py +++ b/tests/unit/test_fw_utils.py @@ -721,6 +721,62 @@ def test_invalid_instance_type(): fw_utils.create_image_uri(MOCK_REGION, MOCK_FRAMEWORK, "p3.2xlarge", "1.0.0", "py3") +def test_valid_inferentia_image(): + image_uri = fw_utils.create_image_uri( + REGION, + "neo-tensorflow", + "ml.inf1.2xlarge", + "1.15.0", + py_version="py3", + account=MOCK_ACCOUNT, + ) + assert ( + image_uri + == "{}.dkr.ecr.{}.amazonaws.com/sagemaker-neo-tensorflow:1.15.0-inf-py3".format( + MOCK_ACCOUNT, REGION + ) + ) + + +def test_invalid_inferentia_region(): + with pytest.raises(ValueError) as e: + fw_utils.create_image_uri( + "ap-south-1", + "neo-tensorflow", + "ml.inf1.2xlarge", + "1.15.0", + py_version="py3", + account=MOCK_ACCOUNT, + ) + assert "Inferentia is not supported in region ap-south-1." in str(e) + + +def test_inferentia_invalid_framework(): + with pytest.raises(ValueError) as e: + fw_utils.create_image_uri( + REGION, + "neo-pytorch", + "ml.inf1.2xlarge", + "1.4.0", + py_version="py3", + account=MOCK_ACCOUNT, + ) + assert "Inferentia does not support pytorch." in str(e) + + +def test_invalid_inferentia_framework_version(): + with pytest.raises(ValueError) as e: + fw_utils.create_image_uri( + REGION, + "neo-tensorflow", + "ml.inf1.2xlarge", + "1.15.2", + py_version="py3", + account=MOCK_ACCOUNT, + ) + assert "Inferentia is not supported with tensorflow version 1.15.2." in str(e) + + @patch( "sagemaker.fw_utils.get_ecr_image_uri_prefix", return_value=ECR_PREFIX_FORMAT.format(MOCK_ACCOUNT), diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py index d25d267551..aa8133d4f3 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_model.py @@ -39,6 +39,7 @@ ACCELERATOR_TYPE = "ml.eia.medium" IMAGE_NAME = "fakeimage" REGION = "us-west-2" +NEO_REGION_ACCOUNT = "301217895009" MODEL_NAME = "{}-{}".format(MODEL_IMAGE, TIMESTAMP) GIT_REPO = "https://github.com/aws/sagemaker-python-sdk.git" BRANCH = "test-branch-git-config" @@ -546,6 +547,29 @@ def test_delete_non_deployed_model(sagemaker_session): model.delete_model() +def test_compile_model_for_inferentia(sagemaker_session, tmpdir): + sagemaker_session.wait_for_compilation_job = Mock( + return_value=DESCRIBE_COMPILATION_JOB_RESPONSE + ) + model = DummyFrameworkModel(sagemaker_session, source_dir=str(tmpdir)) + model.compile( + target_instance_family="ml_inf", + input_shape={"data": [1, 3, 1024, 1024]}, + output_path="s3://output", + role="role", + framework="tensorflow", + framework_version="1.15.0", + job_name="compile-model", + ) + assert ( + "{}.dkr.ecr.{}.amazonaws.com/sagemaker-neo-tensorflow:1.15.0-inf-py3".format( + NEO_REGION_ACCOUNT, REGION + ) + == model.image + ) + assert model._is_compiled_model is True + + def test_compile_model_for_edge_device(sagemaker_session, tmpdir): sagemaker_session.wait_for_compilation_job = Mock( return_value=DESCRIBE_COMPILATION_JOB_RESPONSE