From 99279ffb73183e84d320f10324557d985f637563 Mon Sep 17 00:00:00 2001 From: Eric Johnson <65414824+metrizable@users.noreply.github.com> Date: Thu, 11 Jun 2020 15:24:56 -0700 Subject: [PATCH 1/5] breaking: require framework_version, py_version for tensorflow --- .../tensorflow/upgrade_from_legacy.rst | 1 + doc/frameworks/tensorflow/using_tf.rst | 5 +- src/sagemaker/rl/estimator.py | 4 +- src/sagemaker/tensorflow/estimator.py | 30 ++++----- src/sagemaker/tensorflow/model.py | 22 +++++-- tests/integ/test_data_capture_config.py | 15 ++++- tests/integ/test_model_monitor.py | 8 ++- tests/integ/test_tf.py | 4 +- tests/integ/test_tf_efs_fsx.py | 6 +- tests/integ/test_tfs.py | 15 ++++- tests/integ/test_tuner.py | 10 ++- tests/unit/__init__.py | 2 + .../sagemaker/tensorflow/test_estimator.py | 64 ++++++++++++------- .../tensorflow/test_estimator_init.py | 41 ++++++------ tests/unit/sagemaker/tensorflow/test_tfs.py | 6 ++ 15 files changed, 151 insertions(+), 82 deletions(-) diff --git a/doc/frameworks/tensorflow/upgrade_from_legacy.rst b/doc/frameworks/tensorflow/upgrade_from_legacy.rst index 7805c9579b..a0da2413e6 100644 --- a/doc/frameworks/tensorflow/upgrade_from_legacy.rst +++ b/doc/frameworks/tensorflow/upgrade_from_legacy.rst @@ -104,6 +104,7 @@ the difference in code would be as follows: ... source_dir="code", framework_version="1.10.0", + py_version="py3", train_instance_type="ml.m4.xlarge", image_name="520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow:1.10.0-cpu-py2", hyperparameters={ diff --git a/doc/frameworks/tensorflow/using_tf.rst b/doc/frameworks/tensorflow/using_tf.rst index ae3a56a183..651b3562c5 100644 --- a/doc/frameworks/tensorflow/using_tf.rst +++ b/doc/frameworks/tensorflow/using_tf.rst @@ -319,7 +319,7 @@ To run training job with Pipe input mode, pass in ``input_mode='Pipe'`` to your tf_estimator = TensorFlow(entry_point='tf-train-with-pipemodedataset.py', role='SageMakerRole', training_steps=10000, evaluation_steps=100, train_instance_count=1, train_instance_type='ml.p2.xlarge', - framework_version='1.10.0', input_mode='Pipe') + framework_version='1.10.0', py_version='py3', input_mode='Pipe') tf_estimator.fit('s3://bucket/path/to/training/data') @@ -383,7 +383,8 @@ estimator object to create a SageMaker Endpoint: from sagemaker.tensorflow import TensorFlow estimator = TensorFlow(entry_point='tf-train.py', ..., train_instance_count=1, - train_instance_type='ml.c4.xlarge', framework_version='1.11') + train_instance_type='ml.c4.xlarge', framework_version='1.11', + py_version='py3') estimator.fit(inputs) diff --git a/src/sagemaker/rl/estimator.py b/src/sagemaker/rl/estimator.py index a44992cd8a..d41a0d6484 100644 --- a/src/sagemaker/rl/estimator.py +++ b/src/sagemaker/rl/estimator.py @@ -254,7 +254,9 @@ def create_model( ) if self.framework == RLFramework.TENSORFLOW.value: - return TensorFlowModel(framework_version=self.framework_version, **base_args) + return TensorFlowModel( + framework_version=self.framework_version, py_version=PYTHON_VERSION, **base_args + ) if self.framework == RLFramework.MXNET.value: return MXNetModel( framework_version=self.framework_version, py_version=PYTHON_VERSION, **extended_args diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index 70c120c369..7324fdc3fd 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -56,11 +56,12 @@ def __init__( Args: py_version (str): Python version you want to use for executing your model training - code (default: 'py2'). + code. One of 'py2', 'py3', or 'py37'. Defaults to ``None``. Required unless + ``image_name`` is provided. framework_version (str): TensorFlow version you want to use for executing your model - training code. List of supported versions + training code. Defaults to ``None``. Required unless ``image_name`` is provided. + List of supported versions: https://github.com/aws/sagemaker-python-sdk#tensorflow-sagemaker-estimators. - If not specified, this will default to 1.11. model_dir (str): S3 location where the checkpoint data and models can be exported to during training (default: None). It will be passed in the training script as one of the command line arguments. If not specified, one is provided based on @@ -81,6 +82,10 @@ def __init__( Examples: 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 custom-image:latest. + + If ``framework_version`` or ``py_version`` are ``None``, then + ``image_name`` is required. If also ``None``, then a ``ValueError`` + will be raised. distributions (dict): A dictionary with information on how to run distributed training (default: None). Currently we support distributed training with parameter servers and MPI. @@ -114,18 +119,13 @@ def __init__( :class:`~sagemaker.estimator.Framework` and :class:`~sagemaker.estimator.EstimatorBase`. """ - if framework_version is None: - logger.warning( - fw.empty_framework_version_warning(defaults.TF_VERSION, self.LATEST_VERSION) - ) - self.framework_version = framework_version or defaults.TF_VERSION - - if not py_version: - py_version = "py3" if self._only_python_3_supported() else "py2" + fw.validate_version_or_image_args(framework_version, py_version, image_name) if py_version == "py2": logger.warning( fw.python_deprecation_warning(self.__framework_name__, defaults.LATEST_PY2_VERSION) ) + self.framework_version = framework_version + self.py_version = py_version if distributions is not None: logger.warning(fw.parameter_v2_rename_warning("distribution", distributions)) @@ -136,12 +136,11 @@ def __init__( if "enable_sagemaker_metrics" not in kwargs: # enable sagemaker metrics for TF v1.15 or greater: - if fw.is_version_equal_or_higher([1, 15], self.framework_version): + if framework_version and fw.is_version_equal_or_higher([1, 15], framework_version): kwargs["enable_sagemaker_metrics"] = True super(TensorFlow, self).__init__(image_name=image_name, **kwargs) - self.py_version = py_version self.model_dir = model_dir self.distributions = distributions or {} @@ -150,7 +149,7 @@ def __init__( def _validate_args(self, py_version, framework_version): """Placeholder docstring""" - if py_version == "py3": + if py_version: if framework_version is None: raise AttributeError(fw.EMPTY_FRAMEWORK_VERSION_ERROR) @@ -161,7 +160,7 @@ def _validate_args(self, py_version, framework_version): ) raise AttributeError(msg) - if self._only_legacy_mode_supported() and self.image_name is None: + if self.image_name is None and self._only_legacy_mode_supported(): legacy_image_uri = fw.create_image_uri( self.sagemaker_session.boto_region_name, "tensorflow", @@ -294,6 +293,7 @@ def create_model( role=role or self.role, container_log_level=self.container_log_level, framework_version=self.framework_version, + py_version=self.py_version, sagemaker_session=self.sagemaker_session, vpc_config=self.get_vpc_config(vpc_config_override), entry_point=entry_point, diff --git a/src/sagemaker/tensorflow/model.py b/src/sagemaker/tensorflow/model.py index 4446dd5d1b..15c6ccae25 100644 --- a/src/sagemaker/tensorflow/model.py +++ b/src/sagemaker/tensorflow/model.py @@ -17,9 +17,8 @@ import sagemaker from sagemaker.content_types import CONTENT_TYPE_JSON -from sagemaker.fw_utils import create_image_uri +from sagemaker.fw_utils import create_image_uri, validate_version_or_image_args from sagemaker.predictor import json_serializer, json_deserializer -from sagemaker.tensorflow.defaults import TF_VERSION class TensorFlowPredictor(sagemaker.RealTimePredictor): @@ -138,7 +137,8 @@ def __init__( role, entry_point=None, image=None, - framework_version=TF_VERSION, + framework_version=None, + py_version=None, container_log_level=None, predictor_cls=TensorFlowPredictor, **kwargs @@ -158,9 +158,16 @@ def __init__( hosting. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. image (str): A Docker image URI (default: None). If not specified, a - default image for TensorFlow Serving will be used. + default image for TensorFlow Serving will be used. If + ``framework_version`` or ``py_version`` are ``None``, then + ``image`` is required. If also ``None``, then a ``ValueError`` + will be raised. framework_version (str): Optional. TensorFlow Serving version you - want to use. + want to use. Defaults to ``None``. Required unless ``image`` is + provided. + py_version (str): Python version you want to use for executing your + model training code. One of 'py2', 'py3', or 'py37'. Defaults to + ``None``. Required unless ``image`` is provided. container_log_level (int): Log level to use within the container (default: logging.ERROR). Valid values are defined in the Python logging module. @@ -176,6 +183,10 @@ def __init__( :class:`~sagemaker.model.FrameworkModel` and :class:`~sagemaker.model.Model`. """ + validate_version_or_image_args(framework_version, py_version, image) + self.framework_version = framework_version + self.py_version = py_version + super(TensorFlowModel, self).__init__( model_data=model_data, role=role, @@ -184,7 +195,6 @@ def __init__( entry_point=entry_point, **kwargs ) - self.framework_version = framework_version self._container_log_level = container_log_level def deploy( diff --git a/tests/integ/test_data_capture_config.py b/tests/integ/test_data_capture_config.py index b4ad979fcf..f0fb20cc74 100644 --- a/tests/integ/test_data_capture_config.py +++ b/tests/integ/test_data_capture_config.py @@ -13,6 +13,7 @@ from __future__ import absolute_import import os +import pytest import sagemaker import tests.integ @@ -40,8 +41,13 @@ CUSTOM_JSON_CONTENT_TYPES = ["application/jsontype1", "application/jsontype2"] +@pytest.fixture(scope="module") +def py_version(tf_full_version, tf_serving_version): + return "py37" if tf_full_version == tf_serving_version else tests.integ.PYTHON_VERSION + + def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status( - sagemaker_session, tf_serving_version + sagemaker_session, tf_serving_version, py_version ): endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving") model_data = sagemaker_session.upload_data( @@ -53,6 +59,7 @@ def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status( model_data=model_data, role=ROLE, framework_version=tf_serving_version, + py_version=py_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy( @@ -98,7 +105,7 @@ def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status( def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status( - sagemaker_session, tf_serving_version + sagemaker_session, tf_serving_version, py_version ): endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving") model_data = sagemaker_session.upload_data( @@ -110,6 +117,7 @@ def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status( model_data=model_data, role=ROLE, framework_version=tf_serving_version, + py_version=py_version, sagemaker_session=sagemaker_session, ) destination_s3_uri = os.path.join( @@ -184,7 +192,7 @@ def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status( def test_updating_data_capture_on_endpoint_shows_correct_data_capture_status( - sagemaker_session, tf_serving_version + sagemaker_session, tf_serving_version, py_version ): endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving") model_data = sagemaker_session.upload_data( @@ -196,6 +204,7 @@ def test_updating_data_capture_on_endpoint_shows_correct_data_capture_status( model_data=model_data, role=ROLE, framework_version=tf_serving_version, + py_version=py_version, sagemaker_session=sagemaker_session, ) destination_s3_uri = os.path.join( diff --git a/tests/integ/test_model_monitor.py b/tests/integ/test_model_monitor.py index c876b696b9..012bc86d8d 100644 --- a/tests/integ/test_model_monitor.py +++ b/tests/integ/test_model_monitor.py @@ -88,7 +88,12 @@ @pytest.fixture(scope="module") -def predictor(sagemaker_session, tf_serving_version): +def py_version(tf_full_version, tf_serving_version): + return "py37" if tf_full_version == tf_serving_version else tests.integ.PYTHON_VERSION + + +@pytest.fixture(scope="module") +def predictor(sagemaker_session, tf_serving_version, py_version): endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving") model_data = sagemaker_session.upload_data( path=os.path.join(tests.integ.DATA_DIR, "tensorflow-serving-test-model.tar.gz"), @@ -101,6 +106,7 @@ def predictor(sagemaker_session, tf_serving_version): model_data=model_data, role=ROLE, framework_version=tf_serving_version, + py_version=py_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy( diff --git a/tests/integ/test_tf.py b/tests/integ/test_tf.py index e3e030f799..08a6a66eed 100644 --- a/tests/integ/test_tf.py +++ b/tests/integ/test_tf.py @@ -59,7 +59,7 @@ def test_mnist_with_checkpoint_config( train_instance_type=instance_type, sagemaker_session=sagemaker_session, framework_version=tf_full_version, - py_version="py37", + py_version=py_version, metric_definitions=[{"Name": "train:global_steps", "Regex": r"global_step\/sec:\s(.*)"}], checkpoint_s3_uri=checkpoint_s3_uri, checkpoint_local_path=checkpoint_local_path, @@ -137,8 +137,8 @@ def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, py train_instance_count=2, train_instance_type=instance_type, sagemaker_session=sagemaker_session, - py_version="py37", framework_version=tf_full_version, + py_version=py_version, distributions=PARAMETER_SERVER_DISTRIBUTION, ) inputs = estimator.sagemaker_session.upload_data( diff --git a/tests/integ/test_tf_efs_fsx.py b/tests/integ/test_tf_efs_fsx.py index fb085cfe1f..d805930023 100644 --- a/tests/integ/test_tf_efs_fsx.py +++ b/tests/integ/test_tf_efs_fsx.py @@ -36,7 +36,7 @@ FSX_DIR_PATH = "/fsx/tensorflow" MAX_JOBS = 2 MAX_PARALLEL_JOBS = 2 -PY_VERSION = "py3" +PY_VERSION = "py37" @pytest.fixture(scope="module") @@ -139,8 +139,8 @@ def test_tuning_tf_efs(efs_fsx_setup, sagemaker_session, cpu_instance_type): train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - py_version=PY_VERSION, framework_version=TensorFlow.LATEST_VERSION, + py_version=PY_VERSION, subnets=subnets, security_group_ids=security_group_ids, ) @@ -186,8 +186,8 @@ def test_tuning_tf_lustre(efs_fsx_setup, sagemaker_session, cpu_instance_type): train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - py_version=PY_VERSION, framework_version=TensorFlow.LATEST_VERSION, + py_version=PY_VERSION, subnets=subnets, security_group_ids=security_group_ids, ) diff --git a/tests/integ/test_tfs.py b/tests/integ/test_tfs.py index f009e3063d..4e035e0ead 100644 --- a/tests/integ/test_tfs.py +++ b/tests/integ/test_tfs.py @@ -27,7 +27,12 @@ @pytest.fixture(scope="module") -def tfs_predictor(sagemaker_session, tf_serving_version): +def py_version(tf_full_version, tf_serving_version): + return "py37" if tf_full_version == tf_serving_version else tests.integ.PYTHON_VERSION + + +@pytest.fixture(scope="module") +def tfs_predictor(sagemaker_session, tf_serving_version, py_version): endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving") model_data = sagemaker_session.upload_data( path=os.path.join(tests.integ.DATA_DIR, "tensorflow-serving-test-model.tar.gz"), @@ -38,6 +43,7 @@ def tfs_predictor(sagemaker_session, tf_serving_version): model_data=model_data, role="SageMakerRole", framework_version=tf_serving_version, + py_version=py_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, "ml.c5.xlarge", endpoint_name=endpoint_name) @@ -54,7 +60,7 @@ def tar_dir(directory, tmpdir): @pytest.fixture def tfs_predictor_with_model_and_entry_point_same_tar( - sagemaker_local_session, tf_serving_version, tmpdir + sagemaker_local_session, tf_serving_version, py_version, tmpdir ): endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving") @@ -66,6 +72,7 @@ def tfs_predictor_with_model_and_entry_point_same_tar( model_data="file://" + model_tar, role="SageMakerRole", framework_version=tf_serving_version, + py_version=py_version, sagemaker_session=sagemaker_local_session, ) predictor = model.deploy(1, "local", endpoint_name=endpoint_name) @@ -78,7 +85,7 @@ def tfs_predictor_with_model_and_entry_point_same_tar( @pytest.fixture(scope="module") def tfs_predictor_with_model_and_entry_point_and_dependencies( - sagemaker_local_session, tf_serving_version + sagemaker_local_session, tf_serving_version, py_version ): endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving") @@ -99,6 +106,7 @@ def tfs_predictor_with_model_and_entry_point_and_dependencies( role="SageMakerRole", dependencies=dependencies, framework_version=tf_serving_version, + py_version=py_version, sagemaker_session=sagemaker_local_session, ) @@ -122,6 +130,7 @@ def tfs_predictor_with_accelerator(sagemaker_session, ei_tf_full_version, cpu_in model_data=model_data, role="SageMakerRole", framework_version=ei_tf_full_version, + py_version=tests.integ.PYTHON_VERSION, sagemaker_session=sagemaker_session, ) predictor = model.deploy( diff --git a/tests/integ/test_tuner.py b/tests/integ/test_tuner.py index 9d8198134f..cce53741f8 100644 --- a/tests/integ/test_tuner.py +++ b/tests/integ/test_tuner.py @@ -608,8 +608,8 @@ def test_tuning_tf_script_mode(sagemaker_session, cpu_instance_type, tf_full_ver train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - py_version=py_version, framework_version=tf_full_version, + py_version=py_version, ) hyperparameter_ranges = {"epochs": IntegerParameter(1, 2)} @@ -641,7 +641,7 @@ def test_tuning_tf_script_mode(sagemaker_session, cpu_instance_type, tf_full_ver @pytest.mark.canary_quick @pytest.mark.skipif(PYTHON_VERSION != "py2", reason="TensorFlow image supports only python 2.") -def test_tuning_tf(sagemaker_session, cpu_instance_type): +def test_tuning_tf(sagemaker_session, cpu_instance_type, tf_full_version, py_version): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, "iris", "iris-dnn-classifier.py") @@ -654,6 +654,8 @@ def test_tuning_tf(sagemaker_session, cpu_instance_type): train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, + framework_version=tf_full_version, + py_version=py_version, ) inputs = sagemaker_session.upload_data(path=DATA_PATH, key_prefix="integ-test-data/tf_iris") @@ -694,7 +696,7 @@ def test_tuning_tf(sagemaker_session, cpu_instance_type): @pytest.mark.skipif(PYTHON_VERSION != "py2", reason="TensorFlow image supports only python 2.") -def test_tuning_tf_vpc_multi(sagemaker_session, cpu_instance_type): +def test_tuning_tf_vpc_multi(sagemaker_session, cpu_instance_type, tf_full_version, py_version): """Test Tensorflow multi-instance using the same VpcConfig for training and inference""" instance_type = cpu_instance_type instance_count = 2 @@ -718,6 +720,8 @@ def test_tuning_tf_vpc_multi(sagemaker_session, cpu_instance_type): subnets=subnet_ids, security_group_ids=[security_group_id], encrypt_inter_container_traffic=True, + framework_version=tf_full_version, + py_version=py_version, ) inputs = sagemaker_session.upload_data(path=DATA_PATH, key_prefix="integ-test-data/tf_iris") diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index a94934a1bc..3c91d0b5a1 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -39,3 +39,5 @@ "cn-northwest-1", "us-gov-west-1", ] + +PY_VERSION = "py3" diff --git a/tests/unit/sagemaker/tensorflow/test_estimator.py b/tests/unit/sagemaker/tensorflow/test_estimator.py index 076c849a76..28ff7d9f5a 100644 --- a/tests/unit/sagemaker/tensorflow/test_estimator.py +++ b/tests/unit/sagemaker/tensorflow/test_estimator.py @@ -21,8 +21,9 @@ import pytest from sagemaker.estimator import _TrainingJob -from sagemaker.tensorflow import defaults, model, TensorFlow -from tests.unit import DATA_DIR +from sagemaker.tensorflow import model, TensorFlow +from sagemaker.tensorflow.defaults import TF_VERSION +from tests.unit import DATA_DIR, PY_VERSION SCRIPT_FILE = "dummy_script.py" SCRIPT_PATH = os.path.join(DATA_DIR, SCRIPT_FILE) @@ -144,7 +145,8 @@ def _create_train_job(tf_version, horovod=False, ps=False, py_version="py2"): def _build_tf( sagemaker_session, - framework_version=defaults.TF_VERSION, + framework_version=TF_VERSION, + py_version=PY_VERSION, train_instance_type=None, base_job_name=None, **kwargs @@ -152,6 +154,7 @@ def _build_tf( return TensorFlow( entry_point=SCRIPT_PATH, framework_version=framework_version, + py_version=py_version, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -172,11 +175,12 @@ def test_create_model(sagemaker_session, tf_version): source_dir = "s3://mybucket/source" tf = TensorFlow( entry_point=SCRIPT_PATH, + framework_version=tf_version, + py_version=PY_VERSION, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, - framework_version=tf_version, container_log_level=container_log_level, base_job_name="job", source_dir=source_dir, @@ -198,12 +202,20 @@ def test_create_model(sagemaker_session, tf_version): assert model.enable_network_isolation() -def test_create_model_with_optional_params(sagemaker_session): +def test_create_model_with_optional_params(sagemaker_session, tf_version): + if version.Version(tf_version) < version.Version("1.11"): + pytest.skip( + "Legacy TF version requires explicit image URI, and " + "this logic is tested in test_create_model_with_custom_image." + ) + container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" enable_cloudwatch_metrics = "true" tf = TensorFlow( entry_point=SCRIPT_PATH, + framework_version=tf_version, + py_version=PY_VERSION, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -259,12 +271,20 @@ def test_create_model_with_custom_image(sagemaker_session): @patch("sagemaker.tensorflow.estimator.TensorFlow.create_model") -def test_transformer_creation_with_optional_args(create_model, sagemaker_session): +def test_transformer_creation_with_optional_args(create_model, sagemaker_session, tf_version): + if version.Version(tf_version) < version.Version("1.11"): + pytest.skip( + "Legacy TF version requires explicit image URI, and " + "this logic is tested in test_create_model_with_custom_image." + ) + model = Mock() create_model.return_value = model tf = TensorFlow( entry_point=SCRIPT_PATH, + framework_version=tf_version, + py_version=PY_VERSION, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -329,12 +349,20 @@ def test_transformer_creation_with_optional_args(create_model, sagemaker_session @patch("sagemaker.tensorflow.estimator.TensorFlow.create_model") -def test_transformer_creation_without_optional_args(create_model, sagemaker_session): +def test_transformer_creation_without_optional_args(create_model, sagemaker_session, tf_version): + if version.Version(tf_version) < version.Version("1.11"): + pytest.skip( + "Legacy TF version requires explicit image URI, and " + "this logic is tested in test_create_model_with_custom_image." + ) + model = Mock() create_model.return_value = model tf = TensorFlow( entry_point=SCRIPT_PATH, + framework_version=tf_version, + py_version=PY_VERSION, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -367,9 +395,7 @@ def test_transformer_creation_without_optional_args(create_model, sagemaker_sess def test_script_mode_create_model(sagemaker_session): - tf = _build_tf( - sagemaker_session=sagemaker_session, py_version="py3", enable_network_isolation=True - ) + tf = _build_tf(sagemaker_session=sagemaker_session, enable_network_isolation=True) tf._prepare_for_training() # set output_path and job name as if training happened tf_model = tf.create_model() @@ -391,12 +417,12 @@ def test_script_mode_create_model(sagemaker_session): def test_fit(time, strftime, sagemaker_session): tf = TensorFlow( entry_point=SCRIPT_FILE, + framework_version=TF_VERSION, + py_version=PY_VERSION, role=ROLE, sagemaker_session=sagemaker_session, - py_version="py3", train_instance_type=INSTANCE_TYPE, train_instance_count=1, - framework_version="1.11", source_dir=DATA_DIR, ) @@ -419,12 +445,12 @@ def test_fit(time, strftime, sagemaker_session): def test_fit_ps(time, strftime, sagemaker_session): tf = TensorFlow( entry_point=SCRIPT_FILE, + framework_version=TF_VERSION, + py_version=PY_VERSION, role=ROLE, sagemaker_session=sagemaker_session, - py_version="py3", train_instance_type=INSTANCE_TYPE, train_instance_count=1, - framework_version="1.11", source_dir=DATA_DIR, distributions=DISTRIBUTION_ENABLED, ) @@ -449,12 +475,12 @@ def test_fit_ps(time, strftime, sagemaker_session): def test_fit_mpi(time, strftime, sagemaker_session): tf = TensorFlow( entry_point=SCRIPT_FILE, + framework_version=TF_VERSION, + py_version=PY_VERSION, role=ROLE, sagemaker_session=sagemaker_session, - py_version="py3", train_instance_type=INSTANCE_TYPE, train_instance_count=1, - framework_version="1.11", source_dir=DATA_DIR, distributions=DISTRIBUTION_MPI_ENABLED, ) @@ -483,12 +509,6 @@ def test_hyperparameters_no_model_dir(sagemaker_session): assert "model_dir" not in hyperparameters -def test_train_image_default(sagemaker_session): - tf = _build_tf(sagemaker_session) - expected_image = _image_uri(defaults.TF_VERSION, "py2") - assert expected_image == tf.train_image() - - def test_train_image_custom_image(sagemaker_session): custom_image = "tensorflow:latest" tf = _build_tf(sagemaker_session, image_name=custom_image) diff --git a/tests/unit/sagemaker/tensorflow/test_estimator_init.py b/tests/unit/sagemaker/tensorflow/test_estimator_init.py index 374e9e2fe4..4e9e6aea79 100644 --- a/tests/unit/sagemaker/tensorflow/test_estimator_init.py +++ b/tests/unit/sagemaker/tensorflow/test_estimator_init.py @@ -16,7 +16,7 @@ from packaging import version import pytest -from sagemaker.tensorflow import defaults, TensorFlow +from sagemaker.tensorflow import TensorFlow REGION = "us-west-2" @@ -37,17 +37,9 @@ def _build_tf(sagemaker_session, **kwargs): ) -@patch("sagemaker.fw_utils.empty_framework_version_warning") -def test_empty_framework_version(warning, sagemaker_session): - estimator = _build_tf(sagemaker_session, framework_version=None) - - assert estimator.framework_version == defaults.TF_VERSION - warning.assert_called_with(defaults.TF_VERSION, estimator.LATEST_VERSION) - - @patch("sagemaker.fw_utils.python_deprecation_warning") def test_estimator_py2_deprecation_warning(warning, sagemaker_session): - estimator = _build_tf(sagemaker_session, py_version="py2") + estimator = _build_tf(sagemaker_session, framework_version="2.1.0", py_version="py2") assert estimator.py_version == "py2" warning.assert_called_with("tensorflow", "2.1.0") @@ -71,23 +63,28 @@ def test_py2_version_is_not_deprecated(sagemaker_session): assert estimator.py_version == "py2" -def test_py2_is_default_version_before_tf1_14(sagemaker_session): - estimator = _build_tf(sagemaker_session, framework_version="1.13") - assert estimator.py_version == "py2" - - def test_framework_name(sagemaker_session): - tf = _build_tf(sagemaker_session, framework_version="1.15.2") + tf = _build_tf(sagemaker_session, framework_version="1.15.2", py_version="py3") assert tf.__framework_name__ == "tensorflow" def test_enable_sm_metrics(sagemaker_session): - tf = _build_tf(sagemaker_session, enable_sagemaker_metrics=True) + tf = _build_tf( + sagemaker_session, + framework_version="1.15.2", + py_version="py3", + enable_sagemaker_metrics=True, + ) assert tf.enable_sagemaker_metrics def test_disable_sm_metrics(sagemaker_session): - tf = _build_tf(sagemaker_session, enable_sagemaker_metrics=False) + tf = _build_tf( + sagemaker_session, + framework_version="1.15.2", + py_version="py3", + enable_sagemaker_metrics=False, + ) assert not tf.enable_sagemaker_metrics @@ -95,7 +92,9 @@ def test_disable_sm_metrics_if_fw_ver_is_less_than_1_15(sagemaker_session, tf_ve if version.Version(tf_version) > version.Version("1.14"): pytest.skip("This test is for TF 1.14 and lower.") - tf = _build_tf(sagemaker_session, framework_version=tf_version, image_name="old-image") + tf = _build_tf( + sagemaker_session, framework_version=tf_version, py_version="py2", image_name="old-image" + ) assert tf.enable_sagemaker_metrics is None @@ -103,7 +102,7 @@ def test_enable_sm_metrics_if_fw_ver_is_at_least_1_15(sagemaker_session, tf_vers if version.Version(tf_version) < version.Version("1.15"): pytest.skip("This test is for TF 1.15 and higher.") - tf = _build_tf(sagemaker_session, framework_version=tf_version) + tf = _build_tf(sagemaker_session, framework_version=tf_version, py_version="py3") assert tf.enable_sagemaker_metrics @@ -112,7 +111,7 @@ def test_require_image_name_if_fw_ver_is_less_than_1_11(sagemaker_session, tf_ve pytest.skip("This test is for TF 1.10 and lower.") with pytest.raises(ValueError) as e: - _build_tf(sagemaker_session, framework_version=tf_version) + _build_tf(sagemaker_session, framework_version=tf_version, py_version="py2") expected_msg = ( "TF {version} supports only legacy mode. Please supply the image URI directly with " diff --git a/tests/unit/sagemaker/tensorflow/test_tfs.py b/tests/unit/sagemaker/tensorflow/test_tfs.py index ffab0afbaf..d287cc1e5a 100644 --- a/tests/unit/sagemaker/tensorflow/test_tfs.py +++ b/tests/unit/sagemaker/tensorflow/test_tfs.py @@ -22,6 +22,7 @@ from sagemaker.predictor import csv_serializer from sagemaker.tensorflow import TensorFlow from sagemaker.tensorflow.model import TensorFlowModel, TensorFlowPredictor +from tests.unit import PY_VERSION JSON_CONTENT_TYPE = "application/json" CSV_CONTENT_TYPE = "text/csv" @@ -76,6 +77,7 @@ def test_tfs_model(create_image_uri, sagemaker_session, tf_version): "s3://some/data.tar.gz", role=ROLE, framework_version=tf_version, + py_version=PY_VERSION, sagemaker_session=sagemaker_session, ) cdef = model.prepare_container_def(INSTANCE_TYPE) @@ -95,6 +97,7 @@ def test_tfs_model_accelerator(create_image_uri, sagemaker_session, tf_version): "s3://some/data.tar.gz", role=ROLE, framework_version=tf_version, + py_version=PY_VERSION, sagemaker_session=sagemaker_session, ) cdef = model.prepare_container_def(INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE) @@ -112,6 +115,7 @@ def test_tfs_model_image_accelerator_not_supported(sagemaker_session): "s3://some/data.tar.gz", role=ROLE, framework_version="1.13.1", + py_version=PY_VERSION, sagemaker_session=sagemaker_session, ) @@ -125,6 +129,7 @@ def test_tfs_model_image_accelerator_not_supported(sagemaker_session): "s3://some/data.tar.gz", role=ROLE, framework_version="2.1", + py_version=PY_VERSION, sagemaker_session=sagemaker_session, ) @@ -147,6 +152,7 @@ def test_tfs_model_with_log_level(sagemaker_session, tf_version): "s3://some/data.tar.gz", role=ROLE, framework_version=tf_version, + py_version=PY_VERSION, container_log_level=logging.INFO, sagemaker_session=sagemaker_session, ) From 3c5951817ab8cea2316c050faea02dab60c2733a Mon Sep 17 00:00:00 2001 From: Eric Johnson <65414824+metrizable@users.noreply.github.com> Date: Thu, 11 Jun 2020 21:54:52 -0700 Subject: [PATCH 2/5] fix: py_version issues between tf_full and tf_serving --- tests/integ/test_data_capture_config.py | 15 +++++--------- tests/integ/test_model_monitor.py | 11 +++------- tests/integ/test_tf.py | 27 ++++++++++++------------- tests/integ/test_tfs.py | 20 ++++++++---------- 4 files changed, 29 insertions(+), 44 deletions(-) diff --git a/tests/integ/test_data_capture_config.py b/tests/integ/test_data_capture_config.py index f0fb20cc74..f5cd9417c3 100644 --- a/tests/integ/test_data_capture_config.py +++ b/tests/integ/test_data_capture_config.py @@ -13,7 +13,6 @@ from __future__ import absolute_import import os -import pytest import sagemaker import tests.integ @@ -21,6 +20,7 @@ from sagemaker.model_monitor import DataCaptureConfig, NetworkConfig from sagemaker.tensorflow.model import TensorFlowModel from sagemaker.utils import unique_name_from_base +from tests.integ import PYTHON_VERSION from tests.integ.retry import retries ROLE = "SageMakerRole" @@ -41,13 +41,8 @@ CUSTOM_JSON_CONTENT_TYPES = ["application/jsontype1", "application/jsontype2"] -@pytest.fixture(scope="module") -def py_version(tf_full_version, tf_serving_version): - return "py37" if tf_full_version == tf_serving_version else tests.integ.PYTHON_VERSION - - def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status( - sagemaker_session, tf_serving_version, py_version + sagemaker_session, tf_serving_version ): endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving") model_data = sagemaker_session.upload_data( @@ -59,7 +54,7 @@ def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status( model_data=model_data, role=ROLE, framework_version=tf_serving_version, - py_version=py_version, + py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, ) predictor = model.deploy( @@ -117,7 +112,7 @@ def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status( model_data=model_data, role=ROLE, framework_version=tf_serving_version, - py_version=py_version, + py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, ) destination_s3_uri = os.path.join( @@ -204,7 +199,7 @@ def test_updating_data_capture_on_endpoint_shows_correct_data_capture_status( model_data=model_data, role=ROLE, framework_version=tf_serving_version, - py_version=py_version, + py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, ) destination_s3_uri = os.path.join( diff --git a/tests/integ/test_model_monitor.py b/tests/integ/test_model_monitor.py index 012bc86d8d..4d077ae618 100644 --- a/tests/integ/test_model_monitor.py +++ b/tests/integ/test_model_monitor.py @@ -24,7 +24,7 @@ from sagemaker.s3 import S3Uploader from datetime import datetime, timedelta -from tests.integ import DATA_DIR +from tests.integ import DATA_DIR, PYTHON_VERSION from sagemaker.model_monitor import DatasetFormat from sagemaker.model_monitor import NetworkConfig, Statistics, Constraints from sagemaker.model_monitor import ModelMonitor @@ -88,12 +88,7 @@ @pytest.fixture(scope="module") -def py_version(tf_full_version, tf_serving_version): - return "py37" if tf_full_version == tf_serving_version else tests.integ.PYTHON_VERSION - - -@pytest.fixture(scope="module") -def predictor(sagemaker_session, tf_serving_version, py_version): +def predictor(sagemaker_session, tf_serving_version): endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving") model_data = sagemaker_session.upload_data( path=os.path.join(tests.integ.DATA_DIR, "tensorflow-serving-test-model.tar.gz"), @@ -106,7 +101,7 @@ def predictor(sagemaker_session, tf_serving_version, py_version): model_data=model_data, role=ROLE, framework_version=tf_serving_version, - py_version=py_version, + py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, ) predictor = model.deploy( diff --git a/tests/integ/test_tf.py b/tests/integ/test_tf.py index 08a6a66eed..d99a01aa77 100644 --- a/tests/integ/test_tf.py +++ b/tests/integ/test_tf.py @@ -19,12 +19,11 @@ import pytest from sagemaker.tensorflow import TensorFlow -from sagemaker.tensorflow.defaults import LATEST_SERVING_VERSION +from sagemaker.tensorflow.defaults import LATEST_VERSION, LATEST_SERVING_VERSION from sagemaker.utils import unique_name_from_base, sagemaker_timestamp import tests.integ -from tests.integ import timeout -from tests.integ import kms_utils +from tests.integ import kms_utils, timeout, PYTHON_VERSION from tests.integ.retry import retries from tests.integ.s3_utils import assert_s3_files_exist @@ -39,10 +38,12 @@ MPI_DISTRIBUTION = {"mpi": {"enabled": True}} TAGS = [{"Key": "some-key", "Value": "some-value"}] +PY37_SUPPORTED_FRAMEWORK_VERSION = [TensorFlow._LATEST_1X_VERSION, LATEST_VERSION] + @pytest.fixture(scope="module") -def py_version(tf_full_version, tf_serving_version): - return "py37" if tf_full_version == tf_serving_version else tests.integ.PYTHON_VERSION +def py_version(tf_full_version): + return "py37" if tf_full_version in PY37_SUPPORTED_FRAMEWORK_VERSION else PYTHON_VERSION def test_mnist_with_checkpoint_config( @@ -89,7 +90,7 @@ def test_mnist_with_checkpoint_config( assert actual_training_checkpoint_config == expected_training_checkpoint_config -def test_server_side_encryption(sagemaker_session, tf_serving_version, py_version): +def test_server_side_encryption(sagemaker_session, tf_serving_version): with kms_utils.bucket_with_encryption(sagemaker_session, ROLE) as (bucket_with_kms, kms_key): output_path = os.path.join( bucket_with_kms, "test-server-side-encryption", time.strftime("%y%m%d-%H%M") @@ -103,7 +104,7 @@ def test_server_side_encryption(sagemaker_session, tf_serving_version, py_versio train_instance_type="ml.c5.xlarge", sagemaker_session=sagemaker_session, framework_version=tf_serving_version, - py_version=py_version, + py_version=PYTHON_VERSION, code_location=output_path, output_path=output_path, model_dir="/opt/ml/model", @@ -154,13 +155,13 @@ def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, py ) -def test_mnist_async(sagemaker_session, cpu_instance_type, tf_full_version, py_version): +def test_mnist_async(sagemaker_session, cpu_instance_type): estimator = TensorFlow( entry_point=SCRIPT, role=ROLE, train_instance_count=1, train_instance_type="ml.c5.4xlarge", - py_version=tests.integ.PYTHON_VERSION, + py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, # testing py-sdk functionality, no need to run against all TF versions framework_version=LATEST_SERVING_VERSION, @@ -195,18 +196,16 @@ def test_mnist_async(sagemaker_session, cpu_instance_type, tf_full_version, py_v _assert_model_name_match(sagemaker_session.sagemaker_client, endpoint_name, model_name) -def test_deploy_with_input_handlers( - sagemaker_session, instance_type, tf_serving_version, py_version -): +def test_deploy_with_input_handlers(sagemaker_session, instance_type, tf_serving_version): estimator = TensorFlow( entry_point="training.py", source_dir=TFS_RESOURCE_PATH, role=ROLE, train_instance_count=1, train_instance_type=instance_type, - py_version=py_version, - sagemaker_session=sagemaker_session, framework_version=tf_serving_version, + py_version=PYTHON_VERSION, + sagemaker_session=sagemaker_session, tags=TAGS, ) diff --git a/tests/integ/test_tfs.py b/tests/integ/test_tfs.py index 4e035e0ead..9d130da927 100644 --- a/tests/integ/test_tfs.py +++ b/tests/integ/test_tfs.py @@ -24,15 +24,11 @@ import tests.integ import tests.integ.timeout from sagemaker.tensorflow.model import TensorFlowModel, TensorFlowPredictor +from tests.integ import PYTHON_VERSION @pytest.fixture(scope="module") -def py_version(tf_full_version, tf_serving_version): - return "py37" if tf_full_version == tf_serving_version else tests.integ.PYTHON_VERSION - - -@pytest.fixture(scope="module") -def tfs_predictor(sagemaker_session, tf_serving_version, py_version): +def tfs_predictor(sagemaker_session, tf_serving_version): endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving") model_data = sagemaker_session.upload_data( path=os.path.join(tests.integ.DATA_DIR, "tensorflow-serving-test-model.tar.gz"), @@ -43,7 +39,7 @@ def tfs_predictor(sagemaker_session, tf_serving_version, py_version): model_data=model_data, role="SageMakerRole", framework_version=tf_serving_version, - py_version=py_version, + py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, "ml.c5.xlarge", endpoint_name=endpoint_name) @@ -60,7 +56,7 @@ def tar_dir(directory, tmpdir): @pytest.fixture def tfs_predictor_with_model_and_entry_point_same_tar( - sagemaker_local_session, tf_serving_version, py_version, tmpdir + sagemaker_local_session, tf_serving_version, tmpdir ): endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving") @@ -72,7 +68,7 @@ def tfs_predictor_with_model_and_entry_point_same_tar( model_data="file://" + model_tar, role="SageMakerRole", framework_version=tf_serving_version, - py_version=py_version, + py_version=PYTHON_VERSION, sagemaker_session=sagemaker_local_session, ) predictor = model.deploy(1, "local", endpoint_name=endpoint_name) @@ -85,7 +81,7 @@ def tfs_predictor_with_model_and_entry_point_same_tar( @pytest.fixture(scope="module") def tfs_predictor_with_model_and_entry_point_and_dependencies( - sagemaker_local_session, tf_serving_version, py_version + sagemaker_local_session, tf_serving_version ): endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving") @@ -106,7 +102,7 @@ def tfs_predictor_with_model_and_entry_point_and_dependencies( role="SageMakerRole", dependencies=dependencies, framework_version=tf_serving_version, - py_version=py_version, + py_version=PYTHON_VERSION, sagemaker_session=sagemaker_local_session, ) @@ -130,7 +126,7 @@ def tfs_predictor_with_accelerator(sagemaker_session, ei_tf_full_version, cpu_in model_data=model_data, role="SageMakerRole", framework_version=ei_tf_full_version, - py_version=tests.integ.PYTHON_VERSION, + py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, ) predictor = model.deploy( From 735c5ef29575c199ca92e316cd6ec5e20f8667d6 Mon Sep 17 00:00:00 2001 From: Eric Johnson <65414824+metrizable@users.noreply.github.com> Date: Fri, 12 Jun 2020 07:26:32 -0700 Subject: [PATCH 3/5] fix: too careless removing fixtures and not testing all integs --- tests/integ/test_data_capture_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integ/test_data_capture_config.py b/tests/integ/test_data_capture_config.py index f5cd9417c3..553173ec87 100644 --- a/tests/integ/test_data_capture_config.py +++ b/tests/integ/test_data_capture_config.py @@ -100,7 +100,7 @@ def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status( def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status( - sagemaker_session, tf_serving_version, py_version + sagemaker_session, tf_serving_version ): endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving") model_data = sagemaker_session.upload_data( @@ -187,7 +187,7 @@ def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status( def test_updating_data_capture_on_endpoint_shows_correct_data_capture_status( - sagemaker_session, tf_serving_version, py_version + sagemaker_session, tf_serving_version ): endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving") model_data = sagemaker_session.upload_data( From 210247c0f027cd8f598c2c39617ff836ed2de429 Mon Sep 17 00:00:00 2001 From: Eric Johnson <65414824+metrizable@users.noreply.github.com> Date: Fri, 12 Jun 2020 12:49:29 -0700 Subject: [PATCH 4/5] fix: updates based on PR feedback --- .../tensorflow/upgrade_from_legacy.rst | 2 +- src/sagemaker/rl/estimator.py | 4 +- src/sagemaker/tensorflow/estimator.py | 12 +--- src/sagemaker/tensorflow/model.py | 18 +++--- tests/conftest.py | 28 +++++++- tests/integ/test_airflow_config.py | 2 +- tests/integ/test_data_capture_config.py | 4 -- tests/integ/test_model_monitor.py | 3 +- tests/integ/test_tf.py | 17 ++--- tests/integ/test_tfs.py | 5 -- tests/integ/test_tuner.py | 28 ++++---- .../sagemaker/tensorflow/test_estimator.py | 64 ++++++++++++------- .../tensorflow/test_estimator_init.py | 19 ++++-- tests/unit/sagemaker/tensorflow/test_tfs.py | 6 -- 14 files changed, 112 insertions(+), 100 deletions(-) diff --git a/doc/frameworks/tensorflow/upgrade_from_legacy.rst b/doc/frameworks/tensorflow/upgrade_from_legacy.rst index a0da2413e6..51b80e8d59 100644 --- a/doc/frameworks/tensorflow/upgrade_from_legacy.rst +++ b/doc/frameworks/tensorflow/upgrade_from_legacy.rst @@ -104,7 +104,7 @@ the difference in code would be as follows: ... source_dir="code", framework_version="1.10.0", - py_version="py3", + py_version="py2", train_instance_type="ml.m4.xlarge", image_name="520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow:1.10.0-cpu-py2", hyperparameters={ diff --git a/src/sagemaker/rl/estimator.py b/src/sagemaker/rl/estimator.py index d41a0d6484..a44992cd8a 100644 --- a/src/sagemaker/rl/estimator.py +++ b/src/sagemaker/rl/estimator.py @@ -254,9 +254,7 @@ def create_model( ) if self.framework == RLFramework.TENSORFLOW.value: - return TensorFlowModel( - framework_version=self.framework_version, py_version=PYTHON_VERSION, **base_args - ) + return TensorFlowModel(framework_version=self.framework_version, **base_args) if self.framework == RLFramework.MXNET.value: return MXNetModel( framework_version=self.framework_version, py_version=PYTHON_VERSION, **extended_args diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index 7324fdc3fd..e1c01e99ce 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -56,8 +56,7 @@ def __init__( Args: py_version (str): Python version you want to use for executing your model training - code. One of 'py2', 'py3', or 'py37'. Defaults to ``None``. Required unless - ``image_name`` is provided. + code. Defaults to ``None``. Required unless ``image_name`` is provided. framework_version (str): TensorFlow version you want to use for executing your model training code. Defaults to ``None``. Required unless ``image_name`` is provided. List of supported versions: @@ -144,15 +143,11 @@ def __init__( self.model_dir = model_dir self.distributions = distributions or {} - self._validate_args(py_version=py_version, framework_version=self.framework_version) + self._validate_args(py_version=py_version) - def _validate_args(self, py_version, framework_version): + def _validate_args(self, py_version): """Placeholder docstring""" - if py_version: - if framework_version is None: - raise AttributeError(fw.EMPTY_FRAMEWORK_VERSION_ERROR) - if py_version == "py2" and self._only_python_3_supported(): msg = ( "Python 2 containers are only available with {} and lower versions. " @@ -293,7 +288,6 @@ def create_model( role=role or self.role, container_log_level=self.container_log_level, framework_version=self.framework_version, - py_version=self.py_version, sagemaker_session=self.sagemaker_session, vpc_config=self.get_vpc_config(vpc_config_override), entry_point=entry_point, diff --git a/src/sagemaker/tensorflow/model.py b/src/sagemaker/tensorflow/model.py index 15c6ccae25..6c1e15fda0 100644 --- a/src/sagemaker/tensorflow/model.py +++ b/src/sagemaker/tensorflow/model.py @@ -17,7 +17,7 @@ import sagemaker from sagemaker.content_types import CONTENT_TYPE_JSON -from sagemaker.fw_utils import create_image_uri, validate_version_or_image_args +from sagemaker.fw_utils import create_image_uri from sagemaker.predictor import json_serializer, json_deserializer @@ -138,7 +138,6 @@ def __init__( entry_point=None, image=None, framework_version=None, - py_version=None, container_log_level=None, predictor_cls=TensorFlowPredictor, **kwargs @@ -159,15 +158,11 @@ def __init__( must point to a file located at the root of ``source_dir``. image (str): A Docker image URI (default: None). If not specified, a default image for TensorFlow Serving will be used. If - ``framework_version`` or ``py_version`` are ``None``, then - ``image`` is required. If also ``None``, then a ``ValueError`` - will be raised. + ``framework_version`` is ``None``, then ``image`` is required. + If also ``None``, then a ``ValueError`` will be raised. framework_version (str): Optional. TensorFlow Serving version you want to use. Defaults to ``None``. Required unless ``image`` is provided. - py_version (str): Python version you want to use for executing your - model training code. One of 'py2', 'py3', or 'py37'. Defaults to - ``None``. Required unless ``image`` is provided. container_log_level (int): Log level to use within the container (default: logging.ERROR). Valid values are defined in the Python logging module. @@ -183,9 +178,12 @@ def __init__( :class:`~sagemaker.model.FrameworkModel` and :class:`~sagemaker.model.Model`. """ - validate_version_or_image_args(framework_version, py_version, image) + if framework_version is None and image is None: + raise ValueError( + "Both framework_version and image were None. " + "Either specify framework_version or specify image_name." + ) self.framework_version = framework_version - self.py_version = py_version super(TensorFlowModel, self).__init__( model_data=model_data, diff --git a/tests/conftest.py b/tests/conftest.py index 327250753b..7bfca7cd83 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -209,7 +209,6 @@ def xgboost_version(request): "1.11.0", "1.12", "1.12.0", - "1.13", "1.14", "1.14.0", "1.15", @@ -226,6 +225,16 @@ def tf_version(request): return request.param +@pytest.fixture(scope="module", params=["py2", "py3"]) +def tf_py_version(tf_version, request): + version = [int(val) for val in tf_version.split(".")] + if version < [1, 13]: + return "py2" + if version < [2, 2]: + return request.param + return "py37" + + @pytest.fixture(scope="module", params=["0.10.1", "0.10.1", "0.11", "0.11.0", "0.11.1"]) def rl_coach_tf_version(request): return request.param @@ -290,6 +299,23 @@ def tf_full_version(request): return tf_version +@pytest.fixture(scope="module") +def tf_full_py_version(tf_full_version, request): + """fixture to match tf_full_version + + Fixture exists as such, since tf_full_version may be overridden --tf-full-version. + Otherwise, this would simply be py37 to match the latest version support. + + TODO: Evaluate use of --tf-full-version with possible eye to remove and simplify code. + """ + version = [int(val) for val in tf_full_version.split(".")] + if version < [1, 13]: + return "py2" + if tf_full_version in [TensorFlow._LATEST_1X_VERSION, LATEST_VERSION]: + return "py37" + return "py3" + + @pytest.fixture(scope="module", params=["1.15.0", "2.0.0"]) def ei_tf_full_version(request): tf_ei_version = request.config.getoption("--ei-tf-full-version") diff --git a/tests/integ/test_airflow_config.py b/tests/integ/test_airflow_config.py index 03011115de..87953b7d33 100644 --- a/tests/integ/test_airflow_config.py +++ b/tests/integ/test_airflow_config.py @@ -562,7 +562,7 @@ def test_tf_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_inst train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, framework_version=TensorFlow.LATEST_VERSION, - py_version=PYTHON_VERSION, + py_version="py37", # only version available with 2.2.0 metric_definitions=[ {"Name": "train:global_steps", "Regex": r"global_step\/sec:\s(.*)"} ], diff --git a/tests/integ/test_data_capture_config.py b/tests/integ/test_data_capture_config.py index 553173ec87..b4ad979fcf 100644 --- a/tests/integ/test_data_capture_config.py +++ b/tests/integ/test_data_capture_config.py @@ -20,7 +20,6 @@ from sagemaker.model_monitor import DataCaptureConfig, NetworkConfig from sagemaker.tensorflow.model import TensorFlowModel from sagemaker.utils import unique_name_from_base -from tests.integ import PYTHON_VERSION from tests.integ.retry import retries ROLE = "SageMakerRole" @@ -54,7 +53,6 @@ def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status( model_data=model_data, role=ROLE, framework_version=tf_serving_version, - py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, ) predictor = model.deploy( @@ -112,7 +110,6 @@ def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status( model_data=model_data, role=ROLE, framework_version=tf_serving_version, - py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, ) destination_s3_uri = os.path.join( @@ -199,7 +196,6 @@ def test_updating_data_capture_on_endpoint_shows_correct_data_capture_status( model_data=model_data, role=ROLE, framework_version=tf_serving_version, - py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, ) destination_s3_uri = os.path.join( diff --git a/tests/integ/test_model_monitor.py b/tests/integ/test_model_monitor.py index 4d077ae618..c876b696b9 100644 --- a/tests/integ/test_model_monitor.py +++ b/tests/integ/test_model_monitor.py @@ -24,7 +24,7 @@ from sagemaker.s3 import S3Uploader from datetime import datetime, timedelta -from tests.integ import DATA_DIR, PYTHON_VERSION +from tests.integ import DATA_DIR from sagemaker.model_monitor import DatasetFormat from sagemaker.model_monitor import NetworkConfig, Statistics, Constraints from sagemaker.model_monitor import ModelMonitor @@ -101,7 +101,6 @@ def predictor(sagemaker_session, tf_serving_version): model_data=model_data, role=ROLE, framework_version=tf_serving_version, - py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, ) predictor = model.deploy( diff --git a/tests/integ/test_tf.py b/tests/integ/test_tf.py index d99a01aa77..b4cdbe7f1c 100644 --- a/tests/integ/test_tf.py +++ b/tests/integ/test_tf.py @@ -19,7 +19,7 @@ import pytest from sagemaker.tensorflow import TensorFlow -from sagemaker.tensorflow.defaults import LATEST_VERSION, LATEST_SERVING_VERSION +from sagemaker.tensorflow.defaults import LATEST_SERVING_VERSION from sagemaker.utils import unique_name_from_base, sagemaker_timestamp import tests.integ @@ -38,16 +38,9 @@ MPI_DISTRIBUTION = {"mpi": {"enabled": True}} TAGS = [{"Key": "some-key", "Value": "some-value"}] -PY37_SUPPORTED_FRAMEWORK_VERSION = [TensorFlow._LATEST_1X_VERSION, LATEST_VERSION] - - -@pytest.fixture(scope="module") -def py_version(tf_full_version): - return "py37" if tf_full_version in PY37_SUPPORTED_FRAMEWORK_VERSION else PYTHON_VERSION - def test_mnist_with_checkpoint_config( - sagemaker_session, instance_type, tf_full_version, py_version + sagemaker_session, instance_type, tf_full_version, tf_full_py_version ): checkpoint_s3_uri = "s3://{}/checkpoints/tf-{}".format( sagemaker_session.default_bucket(), sagemaker_timestamp() @@ -60,7 +53,7 @@ def test_mnist_with_checkpoint_config( train_instance_type=instance_type, sagemaker_session=sagemaker_session, framework_version=tf_full_version, - py_version=py_version, + py_version=tf_full_py_version, metric_definitions=[{"Name": "train:global_steps", "Regex": r"global_step\/sec:\s(.*)"}], checkpoint_s3_uri=checkpoint_s3_uri, checkpoint_local_path=checkpoint_local_path, @@ -131,7 +124,7 @@ def test_server_side_encryption(sagemaker_session, tf_serving_version): @pytest.mark.canary_quick -def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, py_version): +def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, tf_full_py_version): estimator = TensorFlow( entry_point=SCRIPT, role=ROLE, @@ -139,7 +132,7 @@ def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, py train_instance_type=instance_type, sagemaker_session=sagemaker_session, framework_version=tf_full_version, - py_version=py_version, + py_version=tf_full_py_version, distributions=PARAMETER_SERVER_DISTRIBUTION, ) inputs = estimator.sagemaker_session.upload_data( diff --git a/tests/integ/test_tfs.py b/tests/integ/test_tfs.py index 9d130da927..f009e3063d 100644 --- a/tests/integ/test_tfs.py +++ b/tests/integ/test_tfs.py @@ -24,7 +24,6 @@ import tests.integ import tests.integ.timeout from sagemaker.tensorflow.model import TensorFlowModel, TensorFlowPredictor -from tests.integ import PYTHON_VERSION @pytest.fixture(scope="module") @@ -39,7 +38,6 @@ def tfs_predictor(sagemaker_session, tf_serving_version): model_data=model_data, role="SageMakerRole", framework_version=tf_serving_version, - py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, "ml.c5.xlarge", endpoint_name=endpoint_name) @@ -68,7 +66,6 @@ def tfs_predictor_with_model_and_entry_point_same_tar( model_data="file://" + model_tar, role="SageMakerRole", framework_version=tf_serving_version, - py_version=PYTHON_VERSION, sagemaker_session=sagemaker_local_session, ) predictor = model.deploy(1, "local", endpoint_name=endpoint_name) @@ -102,7 +99,6 @@ def tfs_predictor_with_model_and_entry_point_and_dependencies( role="SageMakerRole", dependencies=dependencies, framework_version=tf_serving_version, - py_version=PYTHON_VERSION, sagemaker_session=sagemaker_local_session, ) @@ -126,7 +122,6 @@ def tfs_predictor_with_accelerator(sagemaker_session, ei_tf_full_version, cpu_in model_data=model_data, role="SageMakerRole", framework_version=ei_tf_full_version, - py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, ) predictor = model.deploy( diff --git a/tests/integ/test_tuner.py b/tests/integ/test_tuner.py index f494e5cbbd..199b695298 100644 --- a/tests/integ/test_tuner.py +++ b/tests/integ/test_tuner.py @@ -37,7 +37,6 @@ from sagemaker.predictor import json_deserializer from sagemaker.pytorch import PyTorch from sagemaker.tensorflow import TensorFlow -from sagemaker.tensorflow.defaults import LATEST_VERSION from sagemaker.tuner import ( IntegerParameter, ContinuousParameter, @@ -52,13 +51,6 @@ DATA_PATH = os.path.join(DATA_DIR, "iris", "data") -PY37_SUPPORTED_FRAMEWORK_VERSION = [TensorFlow._LATEST_1X_VERSION, LATEST_VERSION] - - -@pytest.fixture(scope="module") -def py_version(tf_full_version): - return "py37" if tf_full_version in PY37_SUPPORTED_FRAMEWORK_VERSION else PYTHON_VERSION - @pytest.fixture(scope="module") def kmeans_train_set(sagemaker_session): @@ -598,7 +590,9 @@ def test_tuning_mxnet(sagemaker_session, mxnet_full_version, cpu_instance_type): @pytest.mark.canary_quick -def test_tuning_tf_script_mode(sagemaker_session, cpu_instance_type, tf_full_version, py_version): +def test_tuning_tf_script_mode( + sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version +): resource_path = os.path.join(DATA_DIR, "tensorflow_mnist") script_path = os.path.join(resource_path, "mnist.py") @@ -609,7 +603,7 @@ def test_tuning_tf_script_mode(sagemaker_session, cpu_instance_type, tf_full_ver train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, framework_version=tf_full_version, - py_version=py_version, + py_version=tf_full_py_version, ) hyperparameter_ranges = {"epochs": IntegerParameter(1, 2)} @@ -639,9 +633,10 @@ def test_tuning_tf_script_mode(sagemaker_session, cpu_instance_type, tf_full_ver tuner.wait() +# TODO: evaluate skip mark and default framework_version 1.11 @pytest.mark.canary_quick @pytest.mark.skipif(PYTHON_VERSION != "py2", reason="TensorFlow image supports only python 2.") -def test_tuning_tf(sagemaker_session, cpu_instance_type, tf_full_version, py_version): +def test_tuning_tf(sagemaker_session, cpu_instance_type): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, "iris", "iris-dnn-classifier.py") @@ -654,8 +649,8 @@ def test_tuning_tf(sagemaker_session, cpu_instance_type, tf_full_version, py_ver train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - framework_version=tf_full_version, - py_version=py_version, + framework_version="1.11", + py_version=PYTHON_VERSION, ) inputs = sagemaker_session.upload_data(path=DATA_PATH, key_prefix="integ-test-data/tf_iris") @@ -695,8 +690,9 @@ def test_tuning_tf(sagemaker_session, cpu_instance_type, tf_full_version, py_ver assert dict_result == list_result +# TODO: evaluate skip mark and default framework_version 1.11 @pytest.mark.skipif(PYTHON_VERSION != "py2", reason="TensorFlow image supports only python 2.") -def test_tuning_tf_vpc_multi(sagemaker_session, cpu_instance_type, tf_full_version, py_version): +def test_tuning_tf_vpc_multi(sagemaker_session, cpu_instance_type): """Test Tensorflow multi-instance using the same VpcConfig for training and inference""" instance_type = cpu_instance_type instance_count = 2 @@ -720,8 +716,8 @@ def test_tuning_tf_vpc_multi(sagemaker_session, cpu_instance_type, tf_full_versi subnets=subnet_ids, security_group_ids=[security_group_id], encrypt_inter_container_traffic=True, - framework_version=tf_full_version, - py_version=py_version, + framework_version="1.11", + py_version=PYTHON_VERSION, ) inputs = sagemaker_session.upload_data(path=DATA_PATH, key_prefix="integ-test-data/tf_iris") diff --git a/tests/unit/sagemaker/tensorflow/test_estimator.py b/tests/unit/sagemaker/tensorflow/test_estimator.py index 28ff7d9f5a..50005490fb 100644 --- a/tests/unit/sagemaker/tensorflow/test_estimator.py +++ b/tests/unit/sagemaker/tensorflow/test_estimator.py @@ -22,8 +22,7 @@ from sagemaker.estimator import _TrainingJob from sagemaker.tensorflow import model, TensorFlow -from sagemaker.tensorflow.defaults import TF_VERSION -from tests.unit import DATA_DIR, PY_VERSION +from tests.unit import DATA_DIR SCRIPT_FILE = "dummy_script.py" SCRIPT_PATH = os.path.join(DATA_DIR, SCRIPT_FILE) @@ -145,8 +144,8 @@ def _create_train_job(tf_version, horovod=False, ps=False, py_version="py2"): def _build_tf( sagemaker_session, - framework_version=TF_VERSION, - py_version=PY_VERSION, + framework_version=None, + py_version=None, train_instance_type=None, base_job_name=None, **kwargs @@ -164,7 +163,7 @@ def _build_tf( ) -def test_create_model(sagemaker_session, tf_version): +def test_create_model(sagemaker_session, tf_version, tf_py_version): if version.Version(tf_version) < version.Version("1.11"): pytest.skip( "Legacy TF version requires explicit image URI, and " @@ -176,7 +175,7 @@ def test_create_model(sagemaker_session, tf_version): tf = TensorFlow( entry_point=SCRIPT_PATH, framework_version=tf_version, - py_version=PY_VERSION, + py_version=tf_py_version, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -202,7 +201,7 @@ def test_create_model(sagemaker_session, tf_version): assert model.enable_network_isolation() -def test_create_model_with_optional_params(sagemaker_session, tf_version): +def test_create_model_with_optional_params(sagemaker_session, tf_version, tf_py_version): if version.Version(tf_version) < version.Version("1.11"): pytest.skip( "Legacy TF version requires explicit image URI, and " @@ -215,7 +214,7 @@ def test_create_model_with_optional_params(sagemaker_session, tf_version): tf = TensorFlow( entry_point=SCRIPT_PATH, framework_version=tf_version, - py_version=PY_VERSION, + py_version=tf_py_version, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -271,7 +270,9 @@ def test_create_model_with_custom_image(sagemaker_session): @patch("sagemaker.tensorflow.estimator.TensorFlow.create_model") -def test_transformer_creation_with_optional_args(create_model, sagemaker_session, tf_version): +def test_transformer_creation_with_optional_args( + create_model, sagemaker_session, tf_version, tf_py_version +): if version.Version(tf_version) < version.Version("1.11"): pytest.skip( "Legacy TF version requires explicit image URI, and " @@ -284,7 +285,7 @@ def test_transformer_creation_with_optional_args(create_model, sagemaker_session tf = TensorFlow( entry_point=SCRIPT_PATH, framework_version=tf_version, - py_version=PY_VERSION, + py_version=tf_py_version, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -349,7 +350,9 @@ def test_transformer_creation_with_optional_args(create_model, sagemaker_session @patch("sagemaker.tensorflow.estimator.TensorFlow.create_model") -def test_transformer_creation_without_optional_args(create_model, sagemaker_session, tf_version): +def test_transformer_creation_without_optional_args( + create_model, sagemaker_session, tf_version, tf_py_version +): if version.Version(tf_version) < version.Version("1.11"): pytest.skip( "Legacy TF version requires explicit image URI, and " @@ -362,7 +365,7 @@ def test_transformer_creation_without_optional_args(create_model, sagemaker_sess tf = TensorFlow( entry_point=SCRIPT_PATH, framework_version=tf_version, - py_version=PY_VERSION, + py_version=tf_py_version, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -395,7 +398,12 @@ def test_transformer_creation_without_optional_args(create_model, sagemaker_sess def test_script_mode_create_model(sagemaker_session): - tf = _build_tf(sagemaker_session=sagemaker_session, enable_network_isolation=True) + tf = _build_tf( + sagemaker_session=sagemaker_session, + framework_version="1.11", + py_version="py2", + enable_network_isolation=True, + ) tf._prepare_for_training() # set output_path and job name as if training happened tf_model = tf.create_model() @@ -417,8 +425,8 @@ def test_script_mode_create_model(sagemaker_session): def test_fit(time, strftime, sagemaker_session): tf = TensorFlow( entry_point=SCRIPT_FILE, - framework_version=TF_VERSION, - py_version=PY_VERSION, + framework_version="1.11", + py_version="py2", role=ROLE, sagemaker_session=sagemaker_session, train_instance_type=INSTANCE_TYPE, @@ -432,7 +440,7 @@ def test_fit(time, strftime, sagemaker_session): call_names = [c[0] for c in sagemaker_session.method_calls] assert call_names == ["train", "logs_for_job"] - expected_train_args = _create_train_job("1.11", py_version="py3") + expected_train_args = _create_train_job("1.11", py_version="py2") expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs actual_train_args = sagemaker_session.method_calls[0][2] @@ -445,8 +453,8 @@ def test_fit(time, strftime, sagemaker_session): def test_fit_ps(time, strftime, sagemaker_session): tf = TensorFlow( entry_point=SCRIPT_FILE, - framework_version=TF_VERSION, - py_version=PY_VERSION, + framework_version="1.11", + py_version="py2", role=ROLE, sagemaker_session=sagemaker_session, train_instance_type=INSTANCE_TYPE, @@ -461,7 +469,7 @@ def test_fit_ps(time, strftime, sagemaker_session): call_names = [c[0] for c in sagemaker_session.method_calls] assert call_names == ["train", "logs_for_job"] - expected_train_args = _create_train_job("1.11", ps=True, py_version="py3") + expected_train_args = _create_train_job("1.11", ps=True, py_version="py2") expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs expected_train_args["hyperparameters"][TensorFlow.LAUNCH_PS_ENV_NAME] = json.dumps(True) @@ -475,8 +483,8 @@ def test_fit_ps(time, strftime, sagemaker_session): def test_fit_mpi(time, strftime, sagemaker_session): tf = TensorFlow( entry_point=SCRIPT_FILE, - framework_version=TF_VERSION, - py_version=PY_VERSION, + framework_version="1.11", + py_version="py2", role=ROLE, sagemaker_session=sagemaker_session, train_instance_type=INSTANCE_TYPE, @@ -491,7 +499,7 @@ def test_fit_mpi(time, strftime, sagemaker_session): call_names = [c[0] for c in sagemaker_session.method_calls] assert call_names == ["train", "logs_for_job"] - expected_train_args = _create_train_job("1.11", horovod=True, py_version="py3") + expected_train_args = _create_train_job("1.11", horovod=True, py_version="py2") expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs expected_train_args["hyperparameters"][TensorFlow.LAUNCH_MPI_ENV_NAME] = json.dumps(True) expected_train_args["hyperparameters"][TensorFlow.MPI_NUM_PROCESSES_PER_HOST] = json.dumps(2) @@ -503,8 +511,16 @@ def test_fit_mpi(time, strftime, sagemaker_session): assert actual_train_args == expected_train_args -def test_hyperparameters_no_model_dir(sagemaker_session): - tf = _build_tf(sagemaker_session, model_dir=False) +def test_hyperparameters_no_model_dir(sagemaker_session, tf_version, tf_py_version): + if version.Version(tf_version) < version.Version("1.11"): + pytest.skip( + "Legacy TF version requires explicit image URI, and " + "this logic is tested in test_create_model_with_custom_image." + ) + + tf = _build_tf( + sagemaker_session, framework_version=tf_version, py_version=tf_py_version, model_dir=False + ) hyperparameters = tf.hyperparameters() assert "model_dir" not in hyperparameters diff --git a/tests/unit/sagemaker/tensorflow/test_estimator_init.py b/tests/unit/sagemaker/tensorflow/test_estimator_init.py index 4e9e6aea79..86b6df3065 100644 --- a/tests/unit/sagemaker/tensorflow/test_estimator_init.py +++ b/tests/unit/sagemaker/tensorflow/test_estimator_init.py @@ -88,30 +88,37 @@ def test_disable_sm_metrics(sagemaker_session): assert not tf.enable_sagemaker_metrics -def test_disable_sm_metrics_if_fw_ver_is_less_than_1_15(sagemaker_session, tf_version): +def test_disable_sm_metrics_if_fw_ver_is_less_than_1_15( + sagemaker_session, tf_version, tf_py_version +): if version.Version(tf_version) > version.Version("1.14"): pytest.skip("This test is for TF 1.14 and lower.") tf = _build_tf( - sagemaker_session, framework_version=tf_version, py_version="py2", image_name="old-image" + sagemaker_session, + framework_version=tf_version, + py_version=tf_py_version, + image_name="old-image", ) assert tf.enable_sagemaker_metrics is None -def test_enable_sm_metrics_if_fw_ver_is_at_least_1_15(sagemaker_session, tf_version): +def test_enable_sm_metrics_if_fw_ver_is_at_least_1_15(sagemaker_session, tf_version, tf_py_version): if version.Version(tf_version) < version.Version("1.15"): pytest.skip("This test is for TF 1.15 and higher.") - tf = _build_tf(sagemaker_session, framework_version=tf_version, py_version="py3") + tf = _build_tf(sagemaker_session, framework_version=tf_version, py_version=tf_py_version) assert tf.enable_sagemaker_metrics -def test_require_image_name_if_fw_ver_is_less_than_1_11(sagemaker_session, tf_version): +def test_require_image_name_if_fw_ver_is_less_than_1_11( + sagemaker_session, tf_version, tf_py_version +): if version.Version(tf_version) > version.Version("1.10"): pytest.skip("This test is for TF 1.10 and lower.") with pytest.raises(ValueError) as e: - _build_tf(sagemaker_session, framework_version=tf_version, py_version="py2") + _build_tf(sagemaker_session, framework_version=tf_version, py_version=tf_py_version) expected_msg = ( "TF {version} supports only legacy mode. Please supply the image URI directly with " diff --git a/tests/unit/sagemaker/tensorflow/test_tfs.py b/tests/unit/sagemaker/tensorflow/test_tfs.py index d287cc1e5a..ffab0afbaf 100644 --- a/tests/unit/sagemaker/tensorflow/test_tfs.py +++ b/tests/unit/sagemaker/tensorflow/test_tfs.py @@ -22,7 +22,6 @@ from sagemaker.predictor import csv_serializer from sagemaker.tensorflow import TensorFlow from sagemaker.tensorflow.model import TensorFlowModel, TensorFlowPredictor -from tests.unit import PY_VERSION JSON_CONTENT_TYPE = "application/json" CSV_CONTENT_TYPE = "text/csv" @@ -77,7 +76,6 @@ def test_tfs_model(create_image_uri, sagemaker_session, tf_version): "s3://some/data.tar.gz", role=ROLE, framework_version=tf_version, - py_version=PY_VERSION, sagemaker_session=sagemaker_session, ) cdef = model.prepare_container_def(INSTANCE_TYPE) @@ -97,7 +95,6 @@ def test_tfs_model_accelerator(create_image_uri, sagemaker_session, tf_version): "s3://some/data.tar.gz", role=ROLE, framework_version=tf_version, - py_version=PY_VERSION, sagemaker_session=sagemaker_session, ) cdef = model.prepare_container_def(INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE) @@ -115,7 +112,6 @@ def test_tfs_model_image_accelerator_not_supported(sagemaker_session): "s3://some/data.tar.gz", role=ROLE, framework_version="1.13.1", - py_version=PY_VERSION, sagemaker_session=sagemaker_session, ) @@ -129,7 +125,6 @@ def test_tfs_model_image_accelerator_not_supported(sagemaker_session): "s3://some/data.tar.gz", role=ROLE, framework_version="2.1", - py_version=PY_VERSION, sagemaker_session=sagemaker_session, ) @@ -152,7 +147,6 @@ def test_tfs_model_with_log_level(sagemaker_session, tf_version): "s3://some/data.tar.gz", role=ROLE, framework_version=tf_version, - py_version=PY_VERSION, container_log_level=logging.INFO, sagemaker_session=sagemaker_session, ) From 7ee405a8a4731744657aa794848809468d10bf84 Mon Sep 17 00:00:00 2001 From: Eric Johnson <65414824+metrizable@users.noreply.github.com> Date: Fri, 12 Jun 2020 15:38:46 -0700 Subject: [PATCH 5/5] fix: updates based on pr feedback --- tests/conftest.py | 5 ++-- tests/integ/test_tuner.py | 58 --------------------------------------- 2 files changed, 3 insertions(+), 60 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 7bfca7cd83..b68f40bd4e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -209,6 +209,7 @@ def xgboost_version(request): "1.11.0", "1.12", "1.12.0", + "1.13", "1.14", "1.14.0", "1.15", @@ -228,7 +229,7 @@ def tf_version(request): @pytest.fixture(scope="module", params=["py2", "py3"]) def tf_py_version(tf_version, request): version = [int(val) for val in tf_version.split(".")] - if version < [1, 13]: + if version < [1, 11]: return "py2" if version < [2, 2]: return request.param @@ -309,7 +310,7 @@ def tf_full_py_version(tf_full_version, request): TODO: Evaluate use of --tf-full-version with possible eye to remove and simplify code. """ version = [int(val) for val in tf_full_version.split(".")] - if version < [1, 13]: + if version < [1, 11]: return "py2" if tf_full_version in [TensorFlow._LATEST_1X_VERSION, LATEST_VERSION]: return "py37" diff --git a/tests/integ/test_tuner.py b/tests/integ/test_tuner.py index 199b695298..33a63c19bc 100644 --- a/tests/integ/test_tuner.py +++ b/tests/integ/test_tuner.py @@ -633,64 +633,6 @@ def test_tuning_tf_script_mode( tuner.wait() -# TODO: evaluate skip mark and default framework_version 1.11 -@pytest.mark.canary_quick -@pytest.mark.skipif(PYTHON_VERSION != "py2", reason="TensorFlow image supports only python 2.") -def test_tuning_tf(sagemaker_session, cpu_instance_type): - with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): - script_path = os.path.join(DATA_DIR, "iris", "iris-dnn-classifier.py") - - estimator = TensorFlow( - entry_point=script_path, - role="SageMakerRole", - training_steps=1, - evaluation_steps=1, - hyperparameters={"input_tensor_name": "inputs"}, - train_instance_count=1, - train_instance_type=cpu_instance_type, - sagemaker_session=sagemaker_session, - framework_version="1.11", - py_version=PYTHON_VERSION, - ) - - inputs = sagemaker_session.upload_data(path=DATA_PATH, key_prefix="integ-test-data/tf_iris") - hyperparameter_ranges = {"learning_rate": ContinuousParameter(0.05, 0.2)} - - objective_metric_name = "loss" - metric_definitions = [{"Name": "loss", "Regex": "loss = ([0-9\\.]+)"}] - - tuner = HyperparameterTuner( - estimator, - objective_metric_name, - hyperparameter_ranges, - metric_definitions, - objective_type="Minimize", - max_jobs=2, - max_parallel_jobs=2, - ) - - tuning_job_name = unique_name_from_base("tune-tf", max_length=32) - tuner.fit(inputs, job_name=tuning_job_name) - - print("Started hyperparameter tuning job with name:" + tuning_job_name) - - time.sleep(15) - tuner.wait() - - best_training_job = tuner.best_training_job() - with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): - predictor = tuner.deploy(1, cpu_instance_type) - - features = [6.4, 3.2, 4.5, 1.5] - dict_result = predictor.predict({"inputs": features}) - print("predict result: {}".format(dict_result)) - list_result = predictor.predict(features) - print("predict result: {}".format(list_result)) - - assert dict_result == list_result - - -# TODO: evaluate skip mark and default framework_version 1.11 @pytest.mark.skipif(PYTHON_VERSION != "py2", reason="TensorFlow image supports only python 2.") def test_tuning_tf_vpc_multi(sagemaker_session, cpu_instance_type): """Test Tensorflow multi-instance using the same VpcConfig for training and inference"""