diff --git a/README.rst b/README.rst index 7096b17412..f1eb78c3fd 100644 --- a/README.rst +++ b/README.rst @@ -192,7 +192,7 @@ By using TensorFlow SageMaker Estimators, you can train and host TensorFlow mode Supported versions of TensorFlow: ``1.4.1``, ``1.5.0``, ``1.6.0``, ``1.7.0``, ``1.8.0``, ``1.9.0``, ``1.10.0``, ``1.11.0``, ``1.12.0``, ``1.13.1``, ``1.14``. -Supported versions of TensorFlow for Elastic Inference: ``1.11.0``, ``1.12.0``, ``1.13.1`` +Supported versions of TensorFlow for Elastic Inference: ``1.11.0``, ``1.12.0``, ``1.13.1``, ``1.14``. We recommend that you use the latest supported version, because that's where we focus most of our development efforts. diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index 416127f67c..f2ab6fd4e1 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -63,14 +63,16 @@ "tensorflow-scriptmode": "tensorflow-training", "mxnet": "mxnet-training", "tensorflow-serving": "tensorflow-inference", - "mxnet-serving": "mxnet-inference", + "tensorflow-serving-eia": "tensorflow-inference-eia", + "mxnet-serving-eia": "mxnet-inference-eia", } MERGED_FRAMEWORKS_LOWEST_VERSIONS = { "tensorflow-scriptmode": [1, 13, 1], "mxnet": [1, 4, 1], "tensorflow-serving": [1, 13, 0], - "mxnet-serving": [1, 4, 1], + "tensorflow-serving-eia": [1, 14, 0], + "mxnet-serving-eia": [1, 4, 1], } @@ -101,7 +103,7 @@ def _is_merged_versions(framework, framework_version): return False -def _using_merged_images(region, framework, py_version, accelerator_type, framework_version): +def _using_merged_images(region, framework, py_version, framework_version): """ Args: region: @@ -116,8 +118,11 @@ def _using_merged_images(region, framework, py_version, accelerator_type, framew return ( (not is_gov_region) and is_merged_versions - and (is_py3 or _is_tf_14_or_later(framework, framework_version)) - and accelerator_type is None + and ( + is_py3 + or _is_tf_14_or_later(framework, framework_version) + or _is_mxnet_serving_141_or_later(framework, framework_version) + ) ) @@ -135,7 +140,25 @@ def _is_tf_14_or_later(framework, framework_version): ) -def _registry_id(region, framework, py_version, account, accelerator_type, framework_version): +def _is_mxnet_serving_141_or_later(framework, framework_version): + """ + Args: + framework: + framework_version: + """ + asimov_lowest_mxnet = [1, 4, 1] + + version = [int(s) for s in framework_version.split(".")] + + if len(version) == 2: + version.append(0) + + return ( + framework.startswith("mxnet-serving") and version >= asimov_lowest_mxnet[0 : len(version)] + ) + + +def _registry_id(region, framework, py_version, account, framework_version): """ Args: region: @@ -145,7 +168,7 @@ def _registry_id(region, framework, py_version, account, accelerator_type, frame accelerator_type: framework_version: """ - if _using_merged_images(region, framework, py_version, accelerator_type, framework_version): + if _using_merged_images(region, framework, py_version, framework_version): if region in ASIMOV_OPT_IN_ACCOUNTS_BY_REGION: return ASIMOV_OPT_IN_ACCOUNTS_BY_REGION.get(region) return "763104351884" @@ -187,13 +210,19 @@ def create_image_uri( if py_version and py_version not in VALID_PY_VERSIONS: raise ValueError("invalid py_version argument: {}".format(py_version)) + if _accelerator_type_valid_for_framework( + framework=framework, + accelerator_type=accelerator_type, + optimized_families=optimized_families, + ): + framework += "-eia" + # Handle Account Number for Gov Cloud and frameworks with DLC merged images account = _registry_id( region=region, framework=framework, py_version=py_version, account=account, - accelerator_type=accelerator_type, framework_version=framework_version, ) @@ -218,19 +247,14 @@ def create_image_uri( else: device_type = "cpu" - if py_version: - tag = "{}-{}-{}".format(framework_version, device_type, py_version) - else: - tag = "{}-{}".format(framework_version, device_type) + using_merged_images = _using_merged_images(region, framework, py_version, framework_version) - if _accelerator_type_valid_for_framework( - framework=framework, - accelerator_type=accelerator_type, - optimized_families=optimized_families, - ): - framework += "-eia" + if not py_version or (using_merged_images and framework == "tensorflow-serving-eia"): + tag = "{}-{}".format(framework_version, device_type) + else: + tag = "{}-{}-{}".format(framework_version, device_type, py_version) - if _using_merged_images(region, framework, py_version, accelerator_type, framework_version): + if using_merged_images: return "{}/{}:{}".format( get_ecr_image_uri_prefix(account, region), MERGED_FRAMEWORKS_REPO_MAP[framework], tag ) diff --git a/src/sagemaker/tensorflow/serving.py b/src/sagemaker/tensorflow/serving.py index 30a7203c85..db012740cd 100644 --- a/src/sagemaker/tensorflow/serving.py +++ b/src/sagemaker/tensorflow/serving.py @@ -131,7 +131,7 @@ class Model(sagemaker.model.FrameworkModel): logging.ERROR: "error", logging.CRITICAL: "crit", } - LATEST_EIA_VERSION = [1, 13] + LATEST_EIA_VERSION = [1, 14] def __init__( self, diff --git a/tests/integ/test_tfs.py b/tests/integ/test_tfs.py index 3c4ae76ef9..8bb5654c14 100644 --- a/tests/integ/test_tfs.py +++ b/tests/integ/test_tfs.py @@ -121,7 +121,7 @@ def tfs_predictor_with_accelerator(sagemaker_session, tf_full_version, cpu_insta model = Model( model_data=model_data, role="SageMakerRole", - framework_version="1.13", + framework_version="1.14", sagemaker_session=sagemaker_session, ) predictor = model.deploy( diff --git a/tests/unit/test_fw_utils.py b/tests/unit/test_fw_utils.py index 359e38866f..53fae0ae0b 100644 --- a/tests/unit/test_fw_utils.py +++ b/tests/unit/test_fw_utils.py @@ -146,6 +146,49 @@ def test_create_image_uri_hkg(): } +def test_tf_eia_images(): + image_uri = fw_utils.create_image_uri( + "us-west-2", + "tensorflow-serving", + "ml.p3.2xlarge", + "1.14.0", + "py3", + accelerator_type="ml.eia1.medium", + ) + assert ( + image_uri + == "763104351884.dkr.ecr.us-west-2.amazonaws.com/tensorflow-inference-eia:1.14.0-gpu" + ) + + +def test_mxnet_eia_images(): + image_uri = fw_utils.create_image_uri( + "us-west-2", + "mxnet-serving", + "ml.p3.2xlarge", + "1.4.1", + "py2", + accelerator_type="ml.eia1.medium", + ) + assert ( + image_uri + == "763104351884.dkr.ecr.us-west-2.amazonaws.com/mxnet-inference-eia:1.4.1-gpu-py2" + ) + + image_uri = fw_utils.create_image_uri( + "us-east-1", + "mxnet-serving", + "ml.c4.2xlarge", + "1.4.1", + "py3", + accelerator_type="ml.eia1.large", + ) + assert ( + image_uri + == "763104351884.dkr.ecr.us-east-1.amazonaws.com/mxnet-inference-eia:1.4.1-cpu-py3" + ) + + def test_create_image_uri_merged(): image_uri = fw_utils.create_image_uri( "us-west-2", "tensorflow-scriptmode", "ml.p3.2xlarge", "1.14", "py3" @@ -175,7 +218,23 @@ def test_create_image_uri_merged(): image_uri = fw_utils.create_image_uri( "us-west-2", "mxnet-serving", "ml.c4.2xlarge", "1.4.1", "py3" ) - assert image_uri == "763104351884.dkr.ecr.us-west-2.amazonaws.com/mxnet-inference:1.4.1-cpu-py3" + assert ( + image_uri + == "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-serving:1.4.1-cpu-py3" + ) + + image_uri = fw_utils.create_image_uri( + "us-west-2", + "mxnet-serving", + "ml.c4.2xlarge", + "1.4.1", + "py3", + accelerator_type="ml.eia1.medium", + ) + assert ( + image_uri + == "763104351884.dkr.ecr.us-west-2.amazonaws.com/mxnet-inference-eia:1.4.1-cpu-py3" + ) def test_create_image_uri_merged_py2(): @@ -198,11 +257,11 @@ def test_create_image_uri_merged_py2(): assert image_uri == "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.4.1-gpu-py2" image_uri = fw_utils.create_image_uri( - "us-west-2", "mxnet-serving", "ml.c4.2xlarge", "1.4.1", "py2" + "us-west-2", "mxnet-serving", "ml.c4.2xlarge", "1.3.1", "py2" ) assert ( image_uri - == "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-serving:1.4.1-cpu-py2" + == "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-serving:1.3.1-cpu-py2" ) diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py index ce6c5f0760..55581dc649 100644 --- a/tests/unit/test_mxnet.py +++ b/tests/unit/test_mxnet.py @@ -337,6 +337,7 @@ def test_mxnet_mms_version( model = mx.create_model() expected_image_base = _get_full_image_uri(mxnet_version, IMAGE_REPO_SERVING_NAME, "gpu") + environment = { "Environment": { "SAGEMAKER_SUBMIT_DIRECTORY": "s3://mybucket/sagemaker-mxnet-2017-11-06-14:14:15.672/model.tar.gz", diff --git a/tests/unit/test_tfs.py b/tests/unit/test_tfs.py index 2b528aecb9..4c21581d5a 100644 --- a/tests/unit/test_tfs.py +++ b/tests/unit/test_tfs.py @@ -113,7 +113,7 @@ def test_tfs_model_image_accelerator_not_supported(sagemaker_session): model = Model( "s3://some/data.tar.gz", role=ROLE, - framework_version="1.14", + framework_version="1.15", sagemaker_session=sagemaker_session, ) @@ -128,7 +128,7 @@ def test_tfs_model_image_accelerator_not_supported(sagemaker_session): initial_instance_count=1, ) - assert str(e.value) == "The TensorFlow version 1.14 doesn't support EIA." + assert str(e.value) == "The TensorFlow version 1.15 doesn't support EIA." def test_tfs_model_with_log_level(sagemaker_session, tf_version):