Skip to content

feature: inferentia support #1373

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions src/sagemaker/fw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@
"pytorch-serving-eia": [1, 3, 1],
}

INFERENTIA_VERSION_RANGES = {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to introduce the unbound and is_version_equal_or_lower? DLCs only have the lowest version. should we be consistent here? Is it because Neo/Inferentia doesn't support the latest version? EIA doesn't support the latest version either. I am okay with introducing a new map here, but maybe we should move EI's validation into the new map as well? downside of this approach is we have to keep updating the unbound every time Neo/EI supports a new version. We should put comments here about "Once Neo/EI has catched up, move them to the MERGED_FRAMEWORKS_LOWEST_VERSIONS map". What do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason why I introduce this upper limit is that Inferentia does not support latest versions and inferentia containers does not align with DLC versions very much.

Now we have tensorflow up to 2.1, but inferentia is 1.15.0, 3 versions behind (1.15.2, 2.0, 2.1). And for mxnet, DLC skipped 1.5.0 but inferentia only has 1.5.0. Plus inferentia is neo containers and follow neo image naming schema, so I think it’s better to have its own logic.

Also inferentia is neo containers, we should not move them to MERGRD_FRAMEWORKS_LOWEAST_VERSIONS because they are not merged dlc containers.

"neo-mxnet": [[1, 5, 1], [1, 5, 1]],
"neo-tensorflow": [[1, 15, 0], [1, 15, 0]],
}

INFERENTIA_SUPPORTED_REGIONS = ["us-east-1", "us-west-2"]

DEBUGGER_UNSUPPORTED_REGIONS = ["us-gov-west-1", "us-iso-east-1"]


Expand All @@ -124,6 +131,23 @@ def is_version_equal_or_higher(lowest_version, framework_version):
return version_list >= lowest_version[0 : len(version_list)]


def is_version_equal_or_lower(highest_version, framework_version):
"""Determine whether the ``framework_version`` is equal to or lower than
``highest_version``

Args:
highest_version (List[int]): highest version represented in an integer
list
framework_version (str): framework version string

Returns:
bool: Whether or not ``framework_version`` is equal to or lower than
``highest_version``
"""
version_list = [int(s) for s in framework_version.split(".")]
return version_list <= highest_version[0 : len(version_list)]


def _is_dlc_version(framework, framework_version, py_version):
"""Return if the framework's version uses the corresponding DLC image.

Expand All @@ -144,6 +168,23 @@ def _is_dlc_version(framework, framework_version, py_version):
return False


def _is_inferentia_supported(framework, framework_version):
"""Return if Inferentia supports the framework and its version.

Args:
framework (str): The framework name, e.g. "tensorflow"
framework_version (str): The framework version

Returns:
bool: Whether or not Inferentia supports the framework and its version.
"""
lowest_version_list = INFERENTIA_VERSION_RANGES.get(framework)[0]
highest_version_list = INFERENTIA_VERSION_RANGES.get(framework)[1]
return is_version_equal_or_higher(
lowest_version_list, framework_version
) and is_version_equal_or_lower(highest_version_list, framework_version)


def _registry_id(region, framework, py_version, account, framework_version):
"""Return the Amazon ECR registry number (or AWS account ID) for
the given framework, framework version, Python version, and region.
Expand Down Expand Up @@ -240,11 +281,34 @@ def create_image_uri(
# 'cpu' or 'gpu'.
if family in optimized_families:
device_type = family
elif family.startswith("inf"):
device_type = "inf"
elif family[0] in ["g", "p"]:
device_type = "gpu"
else:
device_type = "cpu"

if device_type == "inf":
if region not in INFERENTIA_SUPPORTED_REGIONS:
raise ValueError(
"Inferentia is not supported in region {}. Supported regions are {}".format(
region, ", ".join(INFERENTIA_SUPPORTED_REGIONS)
)
)
if framework not in INFERENTIA_VERSION_RANGES:
raise ValueError(
"Inferentia does not support {}. Currently it supports "
"MXNet and TensorFlow with more frameworks coming soon.".format(
framework.split("-")[-1]
)
)
if not _is_inferentia_supported(framework, framework_version):
raise ValueError(
"Inferentia is not supported with {} version {}.".format(
framework.split("-")[-1], framework_version
)
)

use_dlc_image = _is_dlc_version(framework, framework_version, py_version)

if not py_version or (use_dlc_image and framework == "tensorflow-serving-eia"):
Expand Down
32 changes: 32 additions & 0 deletions src/sagemaker/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
"us-gov-west-1": "263933020539",
}

INFERENTIA_INSTANCE_PREFIX = "ml_inf"


class Model(object):
"""A SageMaker ``Model`` that can be deployed to an ``Endpoint``."""
Expand Down Expand Up @@ -286,6 +288,23 @@ def _neo_image(self, region, target_instance_type, framework, framework_version)
account=self._neo_image_account(region),
)

def _inferentia_image(self, region, target_instance_type, framework, framework_version):
"""
Args:
region:
target_instance_type:
framework:
framework_version:
"""
return fw_utils.create_image_uri(
region,
"neo-" + framework.lower(),
target_instance_type.replace("_", "."),
framework_version,
py_version="py3",
account=self._neo_image_account(region),
)

def compile(
self,
target_instance_family,
Expand Down Expand Up @@ -364,6 +383,14 @@ def compile(
framework_version,
)
self._is_compiled_model = True
elif target_instance_family.startswith(INFERENTIA_INSTANCE_PREFIX):
self.image = self._inferentia_image(
self.sagemaker_session.boto_region_name,
target_instance_family,
framework,
framework_version,
)
self._is_compiled_model = True
else:
LOGGER.warning(
"The instance type %s is not supported to deploy via SageMaker,"
Expand Down Expand Up @@ -437,6 +464,11 @@ def deploy(
if self.role is None:
raise ValueError("Role can not be null for deploying a model")

if instance_type.startswith("ml.inf") and not self._is_compiled_model:
LOGGER.warning(
"Your model is not compiled. Please compile your model before using Inferentia."
)

compiled_model_suffix = "-".join(instance_type.split(".")[:-1])
if self._is_compiled_model:
name_prefix = self.name or utils.name_from_image(self.image)
Expand Down
10 changes: 10 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,11 @@ def cpu_instance_type(sagemaker_session, request):
return "ml.m4.xlarge"


@pytest.fixture(scope="session")
def inf_instance_type(sagemaker_session, request):
return "ml.inf1.xlarge"


@pytest.fixture(scope="session")
def ec2_instance_type(cpu_instance_type):
return cpu_instance_type[3:]
Expand All @@ -289,6 +294,11 @@ def cpu_instance_family(cpu_instance_type):
return "_".join(cpu_instance_type.split(".")[0:2])


@pytest.fixture(scope="session")
def inf_instance_family(inf_instance_type):
return "_".join(inf_instance_type.split(".")[0:2])


def pytest_generate_tests(metafunc):
if "instance_type" in metafunc.fixturenames:
boto_config = metafunc.config.getoption("--boto-config")
Expand Down
36 changes: 36 additions & 0 deletions tests/integ/test_neo_mxnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name

NEO_MXNET_VERSION = "1.4.1" # Neo doesn't support MXNet 1.6 yet.
INF_MXNET_VERSION = "1.5.1"


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -110,3 +111,38 @@ def test_deploy_model(
predictor.content_type = "application/vnd+python.numpy+binary"
data = numpy.zeros(shape=(1, 1, 28, 28))
predictor.predict(data)


@pytest.mark.skip(reason="Inferentia is not supported yet.")
def test_inferentia_deploy_model(
mxnet_training_job, sagemaker_session, inf_instance_type, inf_instance_family
):
endpoint_name = unique_name_from_base("test-neo-deploy-model")

with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
desc = sagemaker_session.sagemaker_client.describe_training_job(
TrainingJobName=mxnet_training_job
)
model_data = desc["ModelArtifacts"]["S3ModelArtifacts"]
script_path = os.path.join(DATA_DIR, "mxnet_mnist", "mnist_neo.py")
role = "SageMakerRole"
model = MXNetModel(
model_data,
role,
entry_point=script_path,
framework_version=INF_MXNET_VERSION,
sagemaker_session=sagemaker_session,
)

model.compile(
target_instance_family=inf_instance_family,
input_shape={"data": [1, 1, 28, 28]},
role=role,
job_name=unique_name_from_base("test-deploy-model-compilation-job"),
output_path="/".join(model_data.split("/")[:-1]),
)
predictor = model.deploy(1, inf_instance_type, endpoint_name=endpoint_name)

predictor.content_type = "application/vnd+python.numpy+binary"
data = numpy.zeros(shape=(1, 1, 28, 28))
predictor.predict(data)
56 changes: 56 additions & 0 deletions tests/unit/test_fw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,62 @@ def test_invalid_instance_type():
fw_utils.create_image_uri(MOCK_REGION, MOCK_FRAMEWORK, "p3.2xlarge", "1.0.0", "py3")


def test_valid_inferentia_image():
image_uri = fw_utils.create_image_uri(
REGION,
"neo-tensorflow",
"ml.inf1.2xlarge",
"1.15.0",
py_version="py3",
account=MOCK_ACCOUNT,
)
assert (
image_uri
== "{}.dkr.ecr.{}.amazonaws.com/sagemaker-neo-tensorflow:1.15.0-inf-py3".format(
MOCK_ACCOUNT, REGION
)
)


def test_invalid_inferentia_region():
with pytest.raises(ValueError) as e:
fw_utils.create_image_uri(
"ap-south-1",
"neo-tensorflow",
"ml.inf1.2xlarge",
"1.15.0",
py_version="py3",
account=MOCK_ACCOUNT,
)
assert "Inferentia is not supported in region ap-south-1." in str(e)


def test_inferentia_invalid_framework():
with pytest.raises(ValueError) as e:
fw_utils.create_image_uri(
REGION,
"neo-pytorch",
"ml.inf1.2xlarge",
"1.4.0",
py_version="py3",
account=MOCK_ACCOUNT,
)
assert "Inferentia does not support pytorch." in str(e)


def test_invalid_inferentia_framework_version():
with pytest.raises(ValueError) as e:
fw_utils.create_image_uri(
REGION,
"neo-tensorflow",
"ml.inf1.2xlarge",
"1.15.2",
py_version="py3",
account=MOCK_ACCOUNT,
)
assert "Inferentia is not supported with tensorflow version 1.15.2." in str(e)


@patch(
"sagemaker.fw_utils.get_ecr_image_uri_prefix",
return_value=ECR_PREFIX_FORMAT.format(MOCK_ACCOUNT),
Expand Down
24 changes: 24 additions & 0 deletions tests/unit/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
ACCELERATOR_TYPE = "ml.eia.medium"
IMAGE_NAME = "fakeimage"
REGION = "us-west-2"
NEO_REGION_ACCOUNT = "301217895009"
MODEL_NAME = "{}-{}".format(MODEL_IMAGE, TIMESTAMP)
GIT_REPO = "https://github.com/aws/sagemaker-python-sdk.git"
BRANCH = "test-branch-git-config"
Expand Down Expand Up @@ -546,6 +547,29 @@ def test_delete_non_deployed_model(sagemaker_session):
model.delete_model()


def test_compile_model_for_inferentia(sagemaker_session, tmpdir):
sagemaker_session.wait_for_compilation_job = Mock(
return_value=DESCRIBE_COMPILATION_JOB_RESPONSE
)
model = DummyFrameworkModel(sagemaker_session, source_dir=str(tmpdir))
model.compile(
target_instance_family="ml_inf",
input_shape={"data": [1, 3, 1024, 1024]},
output_path="s3://output",
role="role",
framework="tensorflow",
framework_version="1.15.0",
job_name="compile-model",
)
assert (
"{}.dkr.ecr.{}.amazonaws.com/sagemaker-neo-tensorflow:1.15.0-inf-py3".format(
NEO_REGION_ACCOUNT, REGION
)
== model.image
)
assert model._is_compiled_model is True


def test_compile_model_for_edge_device(sagemaker_session, tmpdir):
sagemaker_session.wait_for_compilation_job = Mock(
return_value=DESCRIBE_COMPILATION_JOB_RESPONSE
Expand Down