Skip to content

breaking: require framework_version, py_version for sklearn #1576

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions doc/frameworks/sklearn/using_sklearn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ To train a Scikit-learn model by using the SageMaker Python SDK:
Prepare a Scikit-learn Training Script
======================================

Your Scikit-learn training script must be a Python 2.7 or 3.6 compatible source file.
Your Scikit-learn training script must be a Python 3.6 compatible source file.

The training script is similar to a training script you might run outside of SageMaker, but you
can access useful properties about the training environment through various environment variables.
Expand Down Expand Up @@ -465,8 +465,10 @@ The following code sample shows how to do this, using the ``SKLearnModel`` class

.. code:: python

sklearn_model = SKLearnModel(model_data="s3://bucket/model.tar.gz", role="SageMakerRole",
entry_point="transform_script.py")
sklearn_model = SKLearnModel(model_data="s3://bucket/model.tar.gz",
role="SageMakerRole",
entry_point="transform_script.py",
framework_version="0.20.0")

predictor = sklearn_model.deploy(instance_type="ml.c4.xlarge", initial_instance_count=1)

Expand Down
74 changes: 45 additions & 29 deletions src/sagemaker/sklearn/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
from sagemaker.fw_registry import default_framework_uri
from sagemaker.fw_utils import (
framework_name_from_image,
empty_framework_version_warning,
python_deprecation_warning,
framework_version_from_tag,
validate_version_or_image_args,
)
from sagemaker.sklearn import defaults
from sagemaker.sklearn.model import SKLearnModel
Expand All @@ -37,10 +37,10 @@ class SKLearn(Framework):
def __init__(
self,
entry_point,
framework_version=defaults.SKLEARN_VERSION,
framework_version=None,
py_version="py3",
source_dir=None,
hyperparameters=None,
py_version="py3",
image_name=None,
**kwargs
):
Expand Down Expand Up @@ -68,8 +68,13 @@ def __init__(
If ``source_dir`` is specified, then ``entry_point``
must point to a file located at the root of ``source_dir``.
framework_version (str): Scikit-learn version you want to use for
executing your model training code. List of supported versions
executing your model training code. Defaults to ``None``. Required
unless ``image_name`` is provided. List of supported versions:
https://github.com/aws/sagemaker-python-sdk#sklearn-sagemaker-estimators
py_version (str): Python version you want to use for executing your
model training code (default: 'py3'). Currently, 'py3' is the only
supported version. If ``None`` is passed in, ``image_name`` must be
provided.
source_dir (str): Path (absolute, relative or an S3 URI) to a directory
with any other training source code dependencies aside from the entry
point file (default: None). If ``source_dir`` is an S3 URI, it must
Expand All @@ -81,15 +86,18 @@ def __init__(
SageMaker. For convenience, this accepts other types for keys
and values, but ``str()`` will be called to convert them before
training.
py_version (str): Python version you want to use for executing your
model training code (default: 'py3'). One of 'py2' or 'py3'.
image_name (str): If specified, the estimator will use this image
for training and hosting, instead of selecting the appropriate
SageMaker official image based on framework_version and
py_version. It can be an ECR url or dockerhub image and tag.

Examples:
123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0
custom-image:latest.

If ``framework_version`` or ``py_version`` are ``None``, then
``image_name`` is required. If also ``None``, then a ``ValueError``
will be raised.
**kwargs: Additional kwargs passed to the
:class:`~sagemaker.estimator.Framework` constructor.

Expand All @@ -99,6 +107,14 @@ def __init__(
:class:`~sagemaker.estimator.Framework` and
:class:`~sagemaker.estimator.EstimatorBase`.
"""
validate_version_or_image_args(framework_version, py_version, image_name)
if py_version and py_version != "py3":
raise AttributeError(
"Scikit-learn image only supports Python 3. Please use 'py3' for py_version."
)
self.framework_version = framework_version
self.py_version = py_version

# SciKit-Learn does not support distributed training or training on GPU instance types.
# Fail fast.
train_instance_type = kwargs.get("train_instance_type")
Expand All @@ -112,6 +128,7 @@ def __init__(
"Please remove the 'train_instance_count' argument or set "
"'train_instance_count=1' when initializing SKLearn."
)

super(SKLearn, self).__init__(
entry_point,
source_dir,
Expand All @@ -120,19 +137,6 @@ def __init__(
**dict(kwargs, train_instance_count=1)
)

if py_version == "py2":
logger.warning(
python_deprecation_warning(self.__framework_name__, defaults.LATEST_PY2_VERSION)
)

self.py_version = py_version

if framework_version is None:
logger.warning(
empty_framework_version_warning(defaults.SKLEARN_VERSION, defaults.SKLEARN_VERSION)
)
self.framework_version = framework_version or defaults.SKLEARN_VERSION

if image_name is None:
image_tag = "{}-{}-{}".format(framework_version, "cpu", py_version)
self.image_name = default_framework_uri(
Expand Down Expand Up @@ -216,28 +220,40 @@ class constructor
Args:
job_details: the returned job details from a describe_training_job
API call.
model_channel_name:
model_channel_name (str): Name of the channel where pre-trained
model data will be downloaded (default: None).

Returns:
dictionary: The transformed init_params
"""
init_params = super(SKLearn, cls)._prepare_init_params_from_job_description(job_details)

init_params = super(SKLearn, cls)._prepare_init_params_from_job_description(
job_details, model_channel_name
)
image_name = init_params.pop("image")
framework, py_version, _, _ = framework_name_from_image(image_name)
framework, py_version, tag, _ = framework_name_from_image(image_name)

if tag is None:
framework_version = None
else:
framework_version = framework_version_from_tag(tag)
init_params["framework_version"] = framework_version
init_params["py_version"] = py_version

if not framework:
# If we were unable to parse the framework name from the image it is not one of our
# officially supported images, in this case just add the image to the init params.
init_params["image_name"] = image_name
return init_params

training_job_name = init_params["base_job_name"]

if framework and framework != cls.__framework_name__:
training_job_name = init_params["base_job_name"]
raise ValueError(
"Training job: {} didn't use image for requested framework".format(
training_job_name
)
)
if not framework:
# If we were unable to parse the framework name from the image it is not one of our
# officially supported images, in this case just add the image to the init params.
init_params["image_name"] = image_name

return init_params


Expand Down
36 changes: 22 additions & 14 deletions src/sagemaker/sklearn/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
from sagemaker import fw_utils

import sagemaker
from sagemaker.fw_utils import model_code_key_prefix, python_deprecation_warning
from sagemaker.fw_registry import default_framework_uri
from sagemaker.fw_utils import model_code_key_prefix, validate_version_or_image_args
from sagemaker.model import FrameworkModel, MODEL_SERVER_WORKERS_PARAM_NAME
from sagemaker.predictor import RealTimePredictor, npy_serializer, numpy_deserializer
from sagemaker.sklearn import defaults
Expand Down Expand Up @@ -62,9 +62,9 @@ def __init__(
model_data,
role,
entry_point,
image=None,
framework_version=None,
py_version="py3",
framework_version=defaults.SKLEARN_VERSION,
image=None,
predictor_cls=SKLearnPredictor,
model_server_workers=None,
**kwargs
Expand All @@ -83,12 +83,19 @@ def __init__(
file which should be executed as the entry point to model
hosting. If ``source_dir`` is specified, then ``entry_point``
must point to a file located at the root of ``source_dir``.
framework_version (str): Scikit-learn version you want to use for
executing your model training code. Defaults to ``None``. Required
unless ``image`` is provided.
py_version (str): Python version you want to use for executing your
model training code (default: 'py3'). Currently, 'py3' is the only
supported version. If ``None`` is passed in, ``image`` must be
provided.
image (str): A Docker image URI (default: None). If not specified, a
default image for Scikit-learn will be used.
py_version (str): Python version you want to use for executing your
model training code (default: 'py3').
framework_version (str): Scikit-learn version you want to use for
executing your model training code.

If ``framework_version`` or ``py_version`` are ``None``, then
``image`` is required. If also ``None``, then a ``ValueError``
will be raised.
predictor_cls (callable[str, sagemaker.session.Session]): A function
to call to create a predictor with an endpoint name and
SageMaker ``Session``. If specified, ``deploy()`` returns the
Expand All @@ -105,17 +112,18 @@ def __init__(
:class:`~sagemaker.model.FrameworkModel` and
:class:`~sagemaker.model.Model`.
"""
validate_version_or_image_args(framework_version, py_version, image)
if py_version and py_version != "py3":
raise AttributeError(
"Scikit-learn image only supports Python 3. Please use 'py3' for py_version."
)
self.framework_version = framework_version
self.py_version = py_version

super(SKLearnModel, self).__init__(
model_data, image, role, entry_point, predictor_cls=predictor_cls, **kwargs
)

if py_version == "py2":
logger.warning(
python_deprecation_warning(self.__framework_name__, defaults.LATEST_PY2_VERSION)
)

self.py_version = py_version
self.framework_version = framework_version
self.model_server_workers = model_server_workers

def prepare_container_def(self, instance_type, accelerator_type=None):
Expand Down
1 change: 1 addition & 0 deletions tests/integ/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ def test_private_github_with_2fa(sagemaker_local_session, sklearn_full_version):
model_data,
"SageMakerRole",
entry_point=script_path,
framework_version=sklearn_full_version,
source_dir=source_dir,
sagemaker_session=sagemaker_local_session,
git_config=git_config,
Expand Down
9 changes: 6 additions & 3 deletions tests/integ/test_sklearn_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def sklearn_training_job(sagemaker_session, sklearn_full_version, cpu_instance_t
sagemaker_session.boto_region_name


@pytest.mark.skipif(PYTHON_VERSION != "py3", reason="Scikit-learn image supports only python 3.")
@pytest.mark.skipif(PYTHON_VERSION != "py3", reason="Scikit-learn image supports only Python 3.")
def test_training_with_additional_hyperparameters(
sagemaker_session, sklearn_full_version, cpu_instance_type
):
Expand Down Expand Up @@ -66,7 +66,7 @@ def test_training_with_additional_hyperparameters(
return sklearn.latest_training_job.name


@pytest.mark.skipif(PYTHON_VERSION != "py3", reason="Scikit-learn image supports only python 3.")
@pytest.mark.skipif(PYTHON_VERSION != "py3", reason="Scikit-learn image supports only Python 3.")
def test_training_with_network_isolation(
sagemaker_session, sklearn_full_version, cpu_instance_type
):
Expand Down Expand Up @@ -121,7 +121,9 @@ def test_attach_deploy(sklearn_training_job, sagemaker_session, cpu_instance_typ
reason="This test has always failed, but the failure was masked by a bug. "
"This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
)
def test_deploy_model(sklearn_training_job, sagemaker_session, cpu_instance_type):
def test_deploy_model(
sklearn_training_job, sagemaker_session, cpu_instance_type, sklearn_full_version
):
endpoint_name = "test-sklearn-deploy-model-{}".format(sagemaker_timestamp())
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
desc = sagemaker_session.sagemaker_client.describe_training_job(
Expand All @@ -133,6 +135,7 @@ def test_deploy_model(sklearn_training_job, sagemaker_session, cpu_instance_type
model_data,
"SageMakerRole",
entry_point=script_path,
framework_version=sklearn_full_version,
sagemaker_session=sagemaker_session,
)
predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name)
Expand Down
Loading