diff --git a/doc/frameworks/sklearn/using_sklearn.rst b/doc/frameworks/sklearn/using_sklearn.rst index c49ffb0947..bea84a0b1d 100644 --- a/doc/frameworks/sklearn/using_sklearn.rst +++ b/doc/frameworks/sklearn/using_sklearn.rst @@ -31,7 +31,7 @@ To train a Scikit-learn model by using the SageMaker Python SDK: Prepare a Scikit-learn Training Script ====================================== -Your Scikit-learn training script must be a Python 2.7 or 3.6 compatible source file. +Your Scikit-learn training script must be a Python 3.6 compatible source file. The training script is similar to a training script you might run outside of SageMaker, but you can access useful properties about the training environment through various environment variables. @@ -465,8 +465,10 @@ The following code sample shows how to do this, using the ``SKLearnModel`` class .. code:: python - sklearn_model = SKLearnModel(model_data="s3://bucket/model.tar.gz", role="SageMakerRole", - entry_point="transform_script.py") + sklearn_model = SKLearnModel(model_data="s3://bucket/model.tar.gz", + role="SageMakerRole", + entry_point="transform_script.py", + framework_version="0.20.0") predictor = sklearn_model.deploy(instance_type="ml.c4.xlarge", initial_instance_count=1) diff --git a/src/sagemaker/sklearn/estimator.py b/src/sagemaker/sklearn/estimator.py index e0fc4f76b9..5f6307c56e 100644 --- a/src/sagemaker/sklearn/estimator.py +++ b/src/sagemaker/sklearn/estimator.py @@ -19,8 +19,8 @@ from sagemaker.fw_registry import default_framework_uri from sagemaker.fw_utils import ( framework_name_from_image, - empty_framework_version_warning, - python_deprecation_warning, + framework_version_from_tag, + validate_version_or_image_args, ) from sagemaker.sklearn import defaults from sagemaker.sklearn.model import SKLearnModel @@ -37,10 +37,10 @@ class SKLearn(Framework): def __init__( self, entry_point, - framework_version=defaults.SKLEARN_VERSION, + framework_version=None, + py_version="py3", source_dir=None, hyperparameters=None, - py_version="py3", image_name=None, **kwargs ): @@ -68,8 +68,13 @@ def __init__( If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. framework_version (str): Scikit-learn version you want to use for - executing your model training code. List of supported versions + executing your model training code. Defaults to ``None``. Required + unless ``image_name`` is provided. List of supported versions: https://github.com/aws/sagemaker-python-sdk#sklearn-sagemaker-estimators + py_version (str): Python version you want to use for executing your + model training code (default: 'py3'). Currently, 'py3' is the only + supported version. If ``None`` is passed in, ``image_name`` must be + provided. source_dir (str): Path (absolute, relative or an S3 URI) to a directory with any other training source code dependencies aside from the entry point file (default: None). If ``source_dir`` is an S3 URI, it must @@ -81,15 +86,18 @@ def __init__( SageMaker. For convenience, this accepts other types for keys and values, but ``str()`` will be called to convert them before training. - py_version (str): Python version you want to use for executing your - model training code (default: 'py3'). One of 'py2' or 'py3'. image_name (str): If specified, the estimator will use this image for training and hosting, instead of selecting the appropriate SageMaker official image based on framework_version and py_version. It can be an ECR url or dockerhub image and tag. + Examples: 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 custom-image:latest. + + If ``framework_version`` or ``py_version`` are ``None``, then + ``image_name`` is required. If also ``None``, then a ``ValueError`` + will be raised. **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor. @@ -99,6 +107,14 @@ def __init__( :class:`~sagemaker.estimator.Framework` and :class:`~sagemaker.estimator.EstimatorBase`. """ + validate_version_or_image_args(framework_version, py_version, image_name) + if py_version and py_version != "py3": + raise AttributeError( + "Scikit-learn image only supports Python 3. Please use 'py3' for py_version." + ) + self.framework_version = framework_version + self.py_version = py_version + # SciKit-Learn does not support distributed training or training on GPU instance types. # Fail fast. train_instance_type = kwargs.get("train_instance_type") @@ -112,6 +128,7 @@ def __init__( "Please remove the 'train_instance_count' argument or set " "'train_instance_count=1' when initializing SKLearn." ) + super(SKLearn, self).__init__( entry_point, source_dir, @@ -120,19 +137,6 @@ def __init__( **dict(kwargs, train_instance_count=1) ) - if py_version == "py2": - logger.warning( - python_deprecation_warning(self.__framework_name__, defaults.LATEST_PY2_VERSION) - ) - - self.py_version = py_version - - if framework_version is None: - logger.warning( - empty_framework_version_warning(defaults.SKLEARN_VERSION, defaults.SKLEARN_VERSION) - ) - self.framework_version = framework_version or defaults.SKLEARN_VERSION - if image_name is None: image_tag = "{}-{}-{}".format(framework_version, "cpu", py_version) self.image_name = default_framework_uri( @@ -216,28 +220,40 @@ class constructor Args: job_details: the returned job details from a describe_training_job API call. - model_channel_name: + model_channel_name (str): Name of the channel where pre-trained + model data will be downloaded (default: None). Returns: dictionary: The transformed init_params """ - init_params = super(SKLearn, cls)._prepare_init_params_from_job_description(job_details) - + init_params = super(SKLearn, cls)._prepare_init_params_from_job_description( + job_details, model_channel_name + ) image_name = init_params.pop("image") - framework, py_version, _, _ = framework_name_from_image(image_name) + framework, py_version, tag, _ = framework_name_from_image(image_name) + + if tag is None: + framework_version = None + else: + framework_version = framework_version_from_tag(tag) + init_params["framework_version"] = framework_version init_params["py_version"] = py_version + if not framework: + # If we were unable to parse the framework name from the image it is not one of our + # officially supported images, in this case just add the image to the init params. + init_params["image_name"] = image_name + return init_params + + training_job_name = init_params["base_job_name"] + if framework and framework != cls.__framework_name__: - training_job_name = init_params["base_job_name"] raise ValueError( "Training job: {} didn't use image for requested framework".format( training_job_name ) ) - if not framework: - # If we were unable to parse the framework name from the image it is not one of our - # officially supported images, in this case just add the image to the init params. - init_params["image_name"] = image_name + return init_params diff --git a/src/sagemaker/sklearn/model.py b/src/sagemaker/sklearn/model.py index fce3d3e304..fc16386e1e 100644 --- a/src/sagemaker/sklearn/model.py +++ b/src/sagemaker/sklearn/model.py @@ -18,8 +18,8 @@ from sagemaker import fw_utils import sagemaker -from sagemaker.fw_utils import model_code_key_prefix, python_deprecation_warning from sagemaker.fw_registry import default_framework_uri +from sagemaker.fw_utils import model_code_key_prefix, validate_version_or_image_args from sagemaker.model import FrameworkModel, MODEL_SERVER_WORKERS_PARAM_NAME from sagemaker.predictor import RealTimePredictor, npy_serializer, numpy_deserializer from sagemaker.sklearn import defaults @@ -62,9 +62,9 @@ def __init__( model_data, role, entry_point, - image=None, + framework_version=None, py_version="py3", - framework_version=defaults.SKLEARN_VERSION, + image=None, predictor_cls=SKLearnPredictor, model_server_workers=None, **kwargs @@ -83,12 +83,19 @@ def __init__( file which should be executed as the entry point to model hosting. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. + framework_version (str): Scikit-learn version you want to use for + executing your model training code. Defaults to ``None``. Required + unless ``image`` is provided. + py_version (str): Python version you want to use for executing your + model training code (default: 'py3'). Currently, 'py3' is the only + supported version. If ``None`` is passed in, ``image`` must be + provided. image (str): A Docker image URI (default: None). If not specified, a default image for Scikit-learn will be used. - py_version (str): Python version you want to use for executing your - model training code (default: 'py3'). - framework_version (str): Scikit-learn version you want to use for - executing your model training code. + + If ``framework_version`` or ``py_version`` are ``None``, then + ``image`` is required. If also ``None``, then a ``ValueError`` + will be raised. predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a predictor with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the @@ -105,17 +112,18 @@ def __init__( :class:`~sagemaker.model.FrameworkModel` and :class:`~sagemaker.model.Model`. """ + validate_version_or_image_args(framework_version, py_version, image) + if py_version and py_version != "py3": + raise AttributeError( + "Scikit-learn image only supports Python 3. Please use 'py3' for py_version." + ) + self.framework_version = framework_version + self.py_version = py_version + super(SKLearnModel, self).__init__( model_data, image, role, entry_point, predictor_cls=predictor_cls, **kwargs ) - if py_version == "py2": - logger.warning( - python_deprecation_warning(self.__framework_name__, defaults.LATEST_PY2_VERSION) - ) - - self.py_version = py_version - self.framework_version = framework_version self.model_server_workers = model_server_workers def prepare_container_def(self, instance_type, accelerator_type=None): diff --git a/tests/integ/test_git.py b/tests/integ/test_git.py index 788d8c68dd..e311924723 100644 --- a/tests/integ/test_git.py +++ b/tests/integ/test_git.py @@ -173,6 +173,7 @@ def test_private_github_with_2fa(sagemaker_local_session, sklearn_full_version): model_data, "SageMakerRole", entry_point=script_path, + framework_version=sklearn_full_version, source_dir=source_dir, sagemaker_session=sagemaker_local_session, git_config=git_config, diff --git a/tests/integ/test_sklearn_train.py b/tests/integ/test_sklearn_train.py index 6bad25d649..e22ecfa01a 100644 --- a/tests/integ/test_sklearn_train.py +++ b/tests/integ/test_sklearn_train.py @@ -36,7 +36,7 @@ def sklearn_training_job(sagemaker_session, sklearn_full_version, cpu_instance_t sagemaker_session.boto_region_name -@pytest.mark.skipif(PYTHON_VERSION != "py3", reason="Scikit-learn image supports only python 3.") +@pytest.mark.skipif(PYTHON_VERSION != "py3", reason="Scikit-learn image supports only Python 3.") def test_training_with_additional_hyperparameters( sagemaker_session, sklearn_full_version, cpu_instance_type ): @@ -66,7 +66,7 @@ def test_training_with_additional_hyperparameters( return sklearn.latest_training_job.name -@pytest.mark.skipif(PYTHON_VERSION != "py3", reason="Scikit-learn image supports only python 3.") +@pytest.mark.skipif(PYTHON_VERSION != "py3", reason="Scikit-learn image supports only Python 3.") def test_training_with_network_isolation( sagemaker_session, sklearn_full_version, cpu_instance_type ): @@ -121,7 +121,9 @@ def test_attach_deploy(sklearn_training_job, sagemaker_session, cpu_instance_typ reason="This test has always failed, but the failure was masked by a bug. " "This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968" ) -def test_deploy_model(sklearn_training_job, sagemaker_session, cpu_instance_type): +def test_deploy_model( + sklearn_training_job, sagemaker_session, cpu_instance_type, sklearn_full_version +): endpoint_name = "test-sklearn-deploy-model-{}".format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): desc = sagemaker_session.sagemaker_client.describe_training_job( @@ -133,6 +135,7 @@ def test_deploy_model(sklearn_training_job, sagemaker_session, cpu_instance_type model_data, "SageMakerRole", entry_point=script_path, + framework_version=sklearn_full_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) diff --git a/tests/unit/test_sklearn.py b/tests/unit/test_sklearn.py index 3acba6b8ee..94d56e8b93 100644 --- a/tests/unit/test_sklearn.py +++ b/tests/unit/test_sklearn.py @@ -167,7 +167,7 @@ def test_train_image(sagemaker_session, sklearn_version): ) -def test_create_model(sagemaker_session): +def test_create_model(sagemaker_session, sklearn_version): source_dir = "s3://mybucket/source" sklearn_model = SKLearnModel( @@ -175,14 +175,15 @@ def test_create_model(sagemaker_session): role=ROLE, sagemaker_session=sagemaker_session, entry_point=SCRIPT_PATH, + framework_version=sklearn_version, ) - default_image_uri = _get_full_cpu_image_uri("0.20.0") + image_uri = _get_full_cpu_image_uri(sklearn_version) model_values = sklearn_model.prepare_container_def(CPU) - assert model_values["Image"] == default_image_uri + assert model_values["Image"] == image_uri @patch("sagemaker.model.FrameworkModel._upload_code") -def test_create_model_with_network_isolation(upload, sagemaker_session): +def test_create_model_with_network_isolation(upload, sagemaker_session, sklearn_version): source_dir = "s3://mybucket/source" repacked_model_data = "s3://mybucket/prefix/model.tar.gz" @@ -192,6 +193,7 @@ def test_create_model_with_network_isolation(upload, sagemaker_session): sagemaker_session=sagemaker_session, entry_point=SCRIPT_PATH, enable_network_isolation=True, + framework_version=sklearn_version, ) sklearn_model.uploaded_code = UploadedCode(s3_prefix=repacked_model_data, script_name="script") sklearn_model.repacked_model_data = repacked_model_data @@ -232,7 +234,7 @@ def test_create_model_from_estimator(sagemaker_session, sklearn_version): assert model.enable_network_isolation() -def test_create_model_with_optional_params(sagemaker_session): +def test_create_model_with_optional_params(sagemaker_session, sklearn_version): container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" enable_cloudwatch_metrics = "true" @@ -242,6 +244,7 @@ def test_create_model_with_optional_params(sagemaker_session): sagemaker_session=sagemaker_session, train_instance_type=INSTANCE_TYPE, container_log_level=container_log_level, + framework_version=sklearn_version, py_version=PYTHON_VERSION, base_job_name="job", source_dir=source_dir, @@ -398,24 +401,26 @@ def test_fail_GPU_training(sagemaker_session, sklearn_version): assert "GPU training in not supported for Scikit-Learn." in str(error) -def test_model(sagemaker_session): +def test_model(sagemaker_session, sklearn_version): model = SKLearnModel( "s3://some/data.tar.gz", role=ROLE, entry_point=SCRIPT_PATH, + framework_version=sklearn_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, CPU) assert isinstance(predictor, SKLearnPredictor) -def test_train_image_default(sagemaker_session): +def test_train_image_default(sagemaker_session, sklearn_version): sklearn = SKLearn( entry_point=SCRIPT_PATH, + framework_version=sklearn_version, + py_version=PYTHON_VERSION, role=ROLE, sagemaker_session=sagemaker_session, train_instance_type=INSTANCE_TYPE, - py_version=PYTHON_VERSION, ) assert _get_full_cpu_image_uri(defaults.SKLEARN_VERSION) in sklearn.train_image() @@ -559,34 +564,31 @@ def test_attach_custom_image(sagemaker_session): assert estimator.train_image() == training_image -@patch("sagemaker.sklearn.estimator.python_deprecation_warning") -def test_estimator_py2_warning(warning, sagemaker_session): - estimator = SKLearn( - entry_point=SCRIPT_PATH, - role=ROLE, - sagemaker_session=sagemaker_session, - train_instance_count=INSTANCE_COUNT, - train_instance_type=INSTANCE_TYPE, - py_version="py2", - ) - - assert estimator.py_version == "py2" - warning.assert_called_with(estimator.__framework_name__, defaults.LATEST_PY2_VERSION) +def test_estimator_py2_raises(sagemaker_session, sklearn_version): + with pytest.raises(AttributeError): + SKLearn( + entry_point=SCRIPT_PATH, + role=ROLE, + sagemaker_session=sagemaker_session, + train_instance_count=INSTANCE_COUNT, + train_instance_type=INSTANCE_TYPE, + framework_version=sklearn_version, + py_version="py2", + ) -@patch("sagemaker.sklearn.model.python_deprecation_warning") -def test_model_py2_warning(warning, sagemaker_session): +def test_model_py2_raises(sagemaker_session, sklearn_version): source_dir = "s3://mybucket/source" - model = SKLearnModel( - model_data=source_dir, - role=ROLE, - entry_point=SCRIPT_PATH, - sagemaker_session=sagemaker_session, - py_version="py2", - ) - assert model.py_version == "py2" - warning.assert_called_with(model.__framework_name__, defaults.LATEST_PY2_VERSION) + with pytest.raises(AttributeError): + SKLearnModel( + model_data=source_dir, + role=ROLE, + entry_point=SCRIPT_PATH, + sagemaker_session=sagemaker_session, + framework_version=sklearn_version, + py_version="py2", + ) def test_custom_image_estimator_deploy(sagemaker_session):