diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 42d3e593f9..99d80d2049 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,9 +2,10 @@ CHANGELOG ========= -1.15.1.dev -========== +1.15.1dev +========= +* feature: Estimators: dependencies attribute allows export of additional libraries into the container * feature: Add APIs to export Airflow transform and deploy config 1.15.0 diff --git a/src/sagemaker/chainer/README.rst b/src/sagemaker/chainer/README.rst index 5007131af6..edbfdbd997 100644 --- a/src/sagemaker/chainer/README.rst +++ b/src/sagemaker/chainer/README.rst @@ -149,6 +149,23 @@ The following are optional arguments. When you create a ``Chainer`` object, you other training source code dependencies including the entry point file. Structure within this directory will be preserved when training on SageMaker. +- ``dependencies (list[str])`` A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container (default: []). + The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. + If the ```source_dir``` points to S3, code will be uploaded and the S3 location will be used + instead. Example: + + The following call + >>> Chainer(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) + results in the following inside the container: + + >>> $ ls + + >>> opt/ml/code + >>> ├── train.py + >>> ├── common + >>> └── virtual-env + - ``hyperparameters`` Hyperparameters that will be used for training. Will be made accessible as a dict[str, str] to the training code on SageMaker. For convenience, accepts other types besides str, but diff --git a/src/sagemaker/chainer/estimator.py b/src/sagemaker/chainer/estimator.py index 646fc4d0e7..baf716fb6a 100644 --- a/src/sagemaker/chainer/estimator.py +++ b/src/sagemaker/chainer/estimator.py @@ -133,7 +133,7 @@ def create_model(self, model_server_workers=None, role=None, vpc_config_override py_version=self.py_version, framework_version=self.framework_version, model_server_workers=model_server_workers, image=self.image_name, sagemaker_session=self.sagemaker_session, - vpc_config=self.get_vpc_config(vpc_config_override)) + vpc_config=self.get_vpc_config(vpc_config_override), dependencies=self.dependencies) @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index d6cf2c195b..1981dd2790 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -637,7 +637,7 @@ class Framework(EstimatorBase): LAUNCH_PS_ENV_NAME = 'sagemaker_parameter_server_enabled' def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cloudwatch_metrics=False, - container_log_level=logging.INFO, code_location=None, image_name=None, **kwargs): + container_log_level=logging.INFO, code_location=None, image_name=None, dependencies=None, **kwargs): """Base class initializer. Subclasses which override ``__init__`` should invoke ``super()`` Args: @@ -646,6 +646,22 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl source_dir (str): Path (absolute or relative) to a directory with any other training source code dependencies aside from tne entry point file (default: None). Structure within this directory are preserved when training on Amazon SageMaker. + dependencies (list[str]): A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container (default: []). + The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. + Example: + + The following call + >>> Estimator(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) + results in the following inside the container: + + >>> $ ls + + >>> opt/ml/code + >>> ├── train.py + >>> ├── common + >>> └── virtual-env + hyperparameters (dict): Hyperparameters that will be used for training (default: None). The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker. For convenience, this accepts other types for keys and values, but ``str()`` will be called @@ -663,6 +679,7 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl """ super(Framework, self).__init__(**kwargs) self.source_dir = source_dir + self.dependencies = dependencies or [] self.entry_point = entry_point if enable_cloudwatch_metrics: warnings.warn('enable_cloudwatch_metrics is now deprecated and will be removed in the future.', @@ -729,7 +746,8 @@ def _stage_user_code_in_s3(self): bucket=code_bucket, s3_key_prefix=code_s3_prefix, script=self.entry_point, - directory=self.source_dir) + directory=self.source_dir, + dependencies=self.dependencies) def _model_source_dir(self): """Get the appropriate value to pass as source_dir to model constructor on deploying diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index 2fc4ab814b..dde632c477 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -14,11 +14,15 @@ import os import re +import shutil +import tempfile from collections import namedtuple from six.moves.urllib.parse import urlparse import sagemaker.utils +_TAR_SOURCE_FILENAME = 'source.tar.gz' + UploadedCode = namedtuple('UserCode', ['s3_prefix', 'script_name']) """sagemaker.fw_utils.UserCode: An object containing the S3 prefix and script name. @@ -107,7 +111,7 @@ def validate_source_dir(script, directory): return True -def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory): +def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory, dependencies=None): """Pack and upload source files to S3 only if directory is empty or local. Note: @@ -118,31 +122,38 @@ def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory): bucket (str): S3 bucket to which the compressed file is uploaded. s3_key_prefix (str): Prefix for the S3 key. script (str): Script filename. - directory (str): Directory containing the source file. If it starts with "s3://", no action is taken. + directory (str or None): Directory containing the source file. If it starts with + "s3://", no action is taken. + dependencies (List[str]): A list of paths to directories (absolute or relative) + containing additional libraries that will be copied into + /opt/ml/lib Returns: sagemaker.fw_utils.UserCode: An object with the S3 bucket and key (S3 prefix) and script name. """ - if directory: - if directory.lower().startswith("s3://"): - return UploadedCode(s3_prefix=directory, script_name=os.path.basename(script)) - else: - script_name = script - source_files = [os.path.join(directory, name) for name in os.listdir(directory)] + dependencies = dependencies or [] + key = '%s/sourcedir.tar.gz' % s3_key_prefix + + if directory and directory.lower().startswith('s3://'): + return UploadedCode(s3_prefix=directory, script_name=os.path.basename(script)) else: - # If no directory is specified, the script parameter needs to be a valid relative path. - os.path.exists(script) - script_name = os.path.basename(script) - source_files = [script] + tmp = tempfile.mkdtemp() + + try: + source_files = _list_files_to_compress(script, directory) + dependencies + tar_file = sagemaker.utils.create_tar_file(source_files, os.path.join(tmp, _TAR_SOURCE_FILENAME)) + + session.resource('s3').Object(bucket, key).upload_file(tar_file) + finally: + shutil.rmtree(tmp) - s3 = session.resource('s3') - key = '{}/{}'.format(s3_key_prefix, 'sourcedir.tar.gz') + script_name = script if directory else os.path.basename(script) + return UploadedCode(s3_prefix='s3://%s/%s' % (bucket, key), script_name=script_name) - tar_file = sagemaker.utils.create_tar_file(source_files) - s3.Object(bucket, key).upload_file(tar_file) - os.remove(tar_file) - return UploadedCode(s3_prefix='s3://{}/{}'.format(bucket, key), script_name=script_name) +def _list_files_to_compress(script, directory): + basedir = directory if directory else os.path.dirname(script) + return [os.path.join(basedir, name) for name in os.listdir(basedir)] def framework_name_from_image(image_name): diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index 972fdb7961..06a85470d7 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -16,10 +16,10 @@ import sagemaker -from sagemaker.local import LocalSession -from sagemaker.fw_utils import tar_and_upload_dir, parse_s3_url, model_code_key_prefix -from sagemaker.session import Session -from sagemaker.utils import name_from_image, get_config_value +from sagemaker import local +from sagemaker import fw_utils +from sagemaker import session +from sagemaker import utils class Model(object): @@ -96,12 +96,12 @@ def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags """ if not self.sagemaker_session: if instance_type in ('local', 'local_gpu'): - self.sagemaker_session = LocalSession() + self.sagemaker_session = local.LocalSession() else: - self.sagemaker_session = Session() + self.sagemaker_session = session.Session() container_def = self.prepare_container_def(instance_type) - self.name = self.name or name_from_image(container_def['Image']) + self.name = self.name or utils.name_from_image(container_def['Image']) self.sagemaker_session.create_model(self.name, self.role, container_def, vpc_config=self.vpc_config) production_variant = sagemaker.production_variant(self.name, instance_type, initial_instance_count) self.endpoint_name = endpoint_name or self.name @@ -127,7 +127,7 @@ class FrameworkModel(Model): def __init__(self, model_data, image, role, entry_point, source_dir=None, predictor_cls=None, env=None, name=None, enable_cloudwatch_metrics=False, container_log_level=logging.INFO, code_location=None, - sagemaker_session=None, **kwargs): + sagemaker_session=None, dependencies=None, **kwargs): """Initialize a ``FrameworkModel``. Args: @@ -140,6 +140,23 @@ def __init__(self, model_data, image, role, entry_point, source_dir=None, predic source code dependencies aside from tne entry point file (default: None). Structure within this directory will be preserved when training on SageMaker. If the directory points to S3, no code will be uploaded and the S3 location will be used instead. + dependencies (list[str]): A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container (default: []). + The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. + If the ```source_dir``` points to S3, code will be uploaded and the S3 location will be used + instead. Example: + + The following call + >>> Estimator(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) + results in the following inside the container: + + >>> $ ls + + >>> opt/ml/code + >>> ├── train.py + >>> ├── common + >>> └── virtual-env + predictor_cls (callable[string, sagemaker.session.Session]): A function to call to create a predictor (default: None). If not None, ``deploy`` will return the result of invoking this function on the created endpoint name. @@ -160,10 +177,11 @@ def __init__(self, model_data, image, role, entry_point, source_dir=None, predic sagemaker_session=sagemaker_session, **kwargs) self.entry_point = entry_point self.source_dir = source_dir + self.dependencies = dependencies or [] self.enable_cloudwatch_metrics = enable_cloudwatch_metrics self.container_log_level = container_log_level if code_location: - self.bucket, self.key_prefix = parse_s3_url(code_location) + self.bucket, self.key_prefix = fw_utils.parse_s3_url(code_location) else: self.bucket, self.key_prefix = None, None self.uploaded_code = None @@ -179,22 +197,24 @@ def prepare_container_def(self, instance_type): # pylint disable=unused-argumen Returns: dict[str, str]: A container definition object usable with the CreateModel API. """ - deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, self.image) + deploy_key_prefix = fw_utils.model_code_key_prefix(self.key_prefix, self.name, self.image) self._upload_code(deploy_key_prefix) deploy_env = dict(self.env) deploy_env.update(self._framework_env_vars()) return sagemaker.container_def(self.image, self.model_data, deploy_env) def _upload_code(self, key_prefix): - local_code = get_config_value('local.local_code', self.sagemaker_session.config) + local_code = utils.get_config_value('local.local_code', self.sagemaker_session.config) if self.sagemaker_session.local_mode and local_code: self.uploaded_code = None else: - self.uploaded_code = tar_and_upload_dir(session=self.sagemaker_session.boto_session, - bucket=self.bucket or self.sagemaker_session.default_bucket(), - s3_key_prefix=key_prefix, - script=self.entry_point, - directory=self.source_dir) + bucket = self.bucket or self.sagemaker_session.default_bucket() + self.uploaded_code = fw_utils.tar_and_upload_dir(session=self.sagemaker_session.boto_session, + bucket=bucket, + s3_key_prefix=key_prefix, + script=self.entry_point, + directory=self.source_dir, + dependencies=self.dependencies) def _framework_env_vars(self): if self.uploaded_code: diff --git a/src/sagemaker/mxnet/README.rst b/src/sagemaker/mxnet/README.rst index 930a03770d..1d5e200bac 100644 --- a/src/sagemaker/mxnet/README.rst +++ b/src/sagemaker/mxnet/README.rst @@ -271,6 +271,23 @@ The following are optional arguments. When you create an ``MXNet`` object, you c other training source code dependencies including the entry point file. Structure within this directory will be preserved when training on SageMaker. +- ``dependencies (list[str])`` A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container (default: []). + The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. + If the ```source_dir``` points to S3, code will be uploaded and the S3 location will be used + instead. Example: + + The following call + >>> MXNet(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) + results in the following inside the container: + + >>> $ ls + + >>> opt/ml/code + >>> ├── train.py + >>> ├── common + >>> └── virtual-env + - ``hyperparameters`` Hyperparameters that will be used for training. Will be made accessible as a dict[str, str] to the training code on SageMaker. For convenience, accepts other types besides str, but diff --git a/src/sagemaker/mxnet/estimator.py b/src/sagemaker/mxnet/estimator.py index 10566702ff..a4c1c1fabd 100644 --- a/src/sagemaker/mxnet/estimator.py +++ b/src/sagemaker/mxnet/estimator.py @@ -115,7 +115,7 @@ def create_model(self, model_server_workers=None, role=None, vpc_config_override container_log_level=self.container_log_level, code_location=self.code_location, py_version=self.py_version, framework_version=self.framework_version, image=self.image_name, model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session, - vpc_config=self.get_vpc_config(vpc_config_override)) + vpc_config=self.get_vpc_config(vpc_config_override), dependencies=self.dependencies) @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): diff --git a/src/sagemaker/pytorch/README.rst b/src/sagemaker/pytorch/README.rst index de8c96d569..c9cca35448 100644 --- a/src/sagemaker/pytorch/README.rst +++ b/src/sagemaker/pytorch/README.rst @@ -175,6 +175,23 @@ The following are optional arguments. When you create a ``PyTorch`` object, you other training source code dependencies including the entry point file. Structure within this directory will be preserved when training on SageMaker. +- ``dependencies (list[str])`` A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container (default: []). + The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. + If the ```source_dir``` points to S3, code will be uploaded and the S3 location will be used + instead. Example: + + The following call + >>> PyTorch(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) + results in the following inside the container: + + >>> $ ls + + >>> opt/ml/code + >>> ├── train.py + >>> ├── common + >>> └── virtual-env + - ``hyperparameters`` Hyperparameters that will be used for training. Will be made accessible as a dict[str, str] to the training code on SageMaker. For convenience, accepts other types besides strings, but diff --git a/src/sagemaker/pytorch/estimator.py b/src/sagemaker/pytorch/estimator.py index 2f75e73854..9dec3f1680 100644 --- a/src/sagemaker/pytorch/estimator.py +++ b/src/sagemaker/pytorch/estimator.py @@ -96,7 +96,7 @@ def create_model(self, model_server_workers=None, role=None, vpc_config_override container_log_level=self.container_log_level, code_location=self.code_location, py_version=self.py_version, framework_version=self.framework_version, image=self.image_name, model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session, - vpc_config=self.get_vpc_config(vpc_config_override)) + vpc_config=self.get_vpc_config(vpc_config_override), dependencies=self.dependencies) @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): diff --git a/src/sagemaker/tensorflow/README.rst b/src/sagemaker/tensorflow/README.rst index 919bd7ed37..6fea7a10d2 100644 --- a/src/sagemaker/tensorflow/README.rst +++ b/src/sagemaker/tensorflow/README.rst @@ -409,6 +409,23 @@ you can specify these as keyword arguments. other training source code dependencies including the entry point file. Structure within this directory will be preserved when training on SageMaker. +- ``dependencies (list[str])`` A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container (default: []). + The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. + If the ```source_dir``` points to S3, code will be uploaded and the S3 location will be used + instead. Example: + + The following call + >>> TensorFlow(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) + results in the following inside the container: + + >>> $ ls + + >>> opt/ml/code + >>> ├── train.py + >>> ├── common + >>> └── virtual-env + - ``requirements_file (str)`` Path to a ``requirements.txt`` file. The path should be within and relative to ``source_dir``. This is a file containing a list of items to be installed using pip install. Details on the format can be found in the diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index 87021871ae..34d5d2bcb4 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -411,7 +411,8 @@ def _create_default_model(self, model_server_workers, role, vpc_config_override) framework_version=self.framework_version, model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session, - vpc_config=self.get_vpc_config(vpc_config_override)) + vpc_config=self.get_vpc_config(vpc_config_override), + dependencies=self.dependencies) def hyperparameters(self): """Return hyperparameters used by your custom TensorFlow code during model training.""" diff --git a/tests/data/pytorch_source_dirs/train.py b/tests/data/pytorch_source_dirs/train.py new file mode 100644 index 0000000000..e1f0667015 --- /dev/null +++ b/tests/data/pytorch_source_dirs/train.py @@ -0,0 +1,30 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import alexa +import json + +MODEL = '/opt/ml/model/answer' + + +def model_fn(anything): + with open(MODEL) as model: + return json.load(model) + + +def predict_fn(input_object, model): + return input_object + model + + +if __name__ == '__main__': + with open(MODEL, 'w') as model: + json.dump(alexa.question('How many roads must a man walk down?'), model) diff --git a/tests/integ/test_source_dirs.py b/tests/integ/test_source_dirs.py new file mode 100644 index 0000000000..806a03742a --- /dev/null +++ b/tests/integ/test_source_dirs.py @@ -0,0 +1,41 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import + +import os + +from sagemaker.pytorch.estimator import PyTorch +from tests.integ import DATA_DIR, PYTHON_VERSION + + +def test_source_dirs(tmpdir, sagemaker_local_session): + source_dir = os.path.join(DATA_DIR, 'pytorch_source_dirs') + lib = os.path.join(str(tmpdir), 'alexa.py') + + with open(lib, 'w') as f: + f.write('def question(to_anything): return 42') + + estimator = PyTorch(entry_point='train.py', role='SageMakerRole', source_dir=source_dir, dependencies=[lib], + py_version=PYTHON_VERSION, train_instance_count=1, train_instance_type='local', + sagemaker_session=sagemaker_local_session) + try: + + estimator.fit() + + predictor = estimator.deploy(initial_instance_count=1, instance_type='local') + + predict_response = predictor.predict([7]) + + assert predict_response == [49] + finally: + estimator.delete_endpoint() diff --git a/tests/unit/test_chainer.py b/tests/unit/test_chainer.py index 5a2b91f76c..1ef13a959d 100644 --- a/tests/unit/test_chainer.py +++ b/tests/unit/test_chainer.py @@ -18,7 +18,7 @@ import pytest import sys from distutils.util import strtobool -from mock import Mock +from mock import MagicMock, Mock from mock import patch @@ -282,6 +282,7 @@ def test_create_model_with_custom_image(sagemaker_session): assert model.image == custom_image +@patch('sagemaker.utils.create_tar_file', MagicMock()) @patch('time.strftime', return_value=TIMESTAMP) def test_chainer(strftime, sagemaker_session, chainer_version): chainer = Chainer(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, @@ -321,6 +322,7 @@ def test_chainer(strftime, sagemaker_session, chainer_version): assert isinstance(predictor, ChainerPredictor) +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_model(sagemaker_session): model = ChainerModel("s3://some/data.tar.gz", role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session) diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py index 7d8d8982f2..0622fa7b28 100644 --- a/tests/unit/test_estimator.py +++ b/tests/unit/test_estimator.py @@ -18,7 +18,7 @@ from time import sleep import pytest -from mock import Mock, patch +from mock import MagicMock, Mock, patch from sagemaker.estimator import Estimator, Framework, _TrainingJob from sagemaker.model import FrameworkModel @@ -127,6 +127,12 @@ def prepare_container_def(self, instance_type): return MODEL_CONTAINER_DEF +@pytest.fixture(autouse=True) +def mock_create_tar_file(): + with patch('sagemaker.utils.create_tar_file', MagicMock()) as create_tar_file: + yield create_tar_file + + @pytest.fixture() def sagemaker_session(): boto_mock = Mock(name='boto_session', region_name=REGION) diff --git a/tests/unit/test_fw_utils.py b/tests/unit/test_fw_utils.py index ad82771e81..0cd328e462 100644 --- a/tests/unit/test_fw_utils.py +++ b/tests/unit/test_fw_utils.py @@ -14,14 +14,12 @@ import inspect import os +import tarfile import pytest from mock import Mock, patch -from sagemaker.fw_utils import create_image_uri, framework_name_from_image, \ - framework_version_from_tag, \ - model_code_key_prefix -from sagemaker.fw_utils import tar_and_upload_dir, parse_s3_url, UploadedCode, validate_source_dir +from sagemaker import fw_utils from sagemaker.utils import name_from_image DATA_DIR = 'data_dir' @@ -44,62 +42,62 @@ def sagemaker_session(): def test_create_image_uri_cpu(): - image_uri = create_image_uri('mars-south-3', 'mlfw', 'ml.c4.large', '1.0rc', 'py2', '23') + image_uri = fw_utils.create_image_uri('mars-south-3', 'mlfw', 'ml.c4.large', '1.0rc', 'py2', '23') assert image_uri == '23.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw:1.0rc-cpu-py2' - image_uri = create_image_uri('mars-south-3', 'mlfw', 'local', '1.0rc', 'py2', '23') + image_uri = fw_utils.create_image_uri('mars-south-3', 'mlfw', 'local', '1.0rc', 'py2', '23') assert image_uri == '23.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw:1.0rc-cpu-py2' def test_create_image_uri_no_python(): - image_uri = create_image_uri('mars-south-3', 'mlfw', 'ml.c4.large', '1.0rc', account='23') + image_uri = fw_utils.create_image_uri('mars-south-3', 'mlfw', 'ml.c4.large', '1.0rc', account='23') assert image_uri == '23.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw:1.0rc-cpu' def test_create_image_uri_bad_python(): with pytest.raises(ValueError): - create_image_uri('mars-south-3', 'mlfw', 'ml.c4.large', '1.0rc', 'py0') + fw_utils.create_image_uri('mars-south-3', 'mlfw', 'ml.c4.large', '1.0rc', 'py0') def test_create_image_uri_gpu(): - image_uri = create_image_uri('mars-south-3', 'mlfw', 'ml.p3.2xlarge', '1.0rc', 'py3', '23') + image_uri = fw_utils.create_image_uri('mars-south-3', 'mlfw', 'ml.p3.2xlarge', '1.0rc', 'py3', '23') assert image_uri == '23.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw:1.0rc-gpu-py3' - image_uri = create_image_uri('mars-south-3', 'mlfw', 'local_gpu', '1.0rc', 'py3', '23') + image_uri = fw_utils.create_image_uri('mars-south-3', 'mlfw', 'local_gpu', '1.0rc', 'py3', '23') assert image_uri == '23.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw:1.0rc-gpu-py3' def test_create_image_uri_default_account(): - image_uri = create_image_uri('mars-south-3', 'mlfw', 'ml.p3.2xlarge', '1.0rc', 'py3') + image_uri = fw_utils.create_image_uri('mars-south-3', 'mlfw', 'ml.p3.2xlarge', '1.0rc', 'py3') assert image_uri == '520713654638.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw:1.0rc-gpu-py3' def test_create_image_uri_gov_cloud(): - image_uri = create_image_uri('us-gov-west-1', 'mlfw', 'ml.p3.2xlarge', '1.0rc', 'py3') + image_uri = fw_utils.create_image_uri('us-gov-west-1', 'mlfw', 'ml.p3.2xlarge', '1.0rc', 'py3') assert image_uri == '246785580436.dkr.ecr.us-gov-west-1.amazonaws.com/sagemaker-mlfw:1.0rc-gpu-py3' def test_invalid_instance_type(): # instance type is missing 'ml.' prefix with pytest.raises(ValueError): - create_image_uri('mars-south-3', 'mlfw', 'p3.2xlarge', '1.0.0', 'py3') + fw_utils.create_image_uri('mars-south-3', 'mlfw', 'p3.2xlarge', '1.0.0', 'py3') def test_optimized_family(): - image_uri = create_image_uri('mars-south-3', 'mlfw', 'ml.p3.2xlarge', '1.0.0', 'py3', - optimized_families=['c5', 'p3']) + image_uri = fw_utils.create_image_uri('mars-south-3', 'mlfw', 'ml.p3.2xlarge', '1.0.0', 'py3', + optimized_families=['c5', 'p3']) assert image_uri == '520713654638.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw:1.0.0-p3-py3' def test_unoptimized_cpu_family(): - image_uri = create_image_uri('mars-south-3', 'mlfw', 'ml.m4.xlarge', '1.0.0', 'py3', - optimized_families=['c5', 'p3']) + image_uri = fw_utils.create_image_uri('mars-south-3', 'mlfw', 'ml.m4.xlarge', '1.0.0', 'py3', + optimized_families=['c5', 'p3']) assert image_uri == '520713654638.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw:1.0.0-cpu-py3' def test_unoptimized_gpu_family(): - image_uri = create_image_uri('mars-south-3', 'mlfw', 'ml.p2.xlarge', '1.0.0', 'py3', - optimized_families=['c5', 'p3']) + image_uri = fw_utils.create_image_uri('mars-south-3', 'mlfw', 'ml.p2.xlarge', '1.0.0', 'py3', + optimized_families=['c5', 'p3']) assert image_uri == '520713654638.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw:1.0.0-gpu-py3' @@ -108,29 +106,29 @@ def test_tar_and_upload_dir_s3(sagemaker_session): s3_key_prefix = 'something/source' script = 'mnist.py' directory = 's3://m' - result = tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script, directory) - assert result == UploadedCode('s3://m', 'mnist.py') + result = fw_utils.tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script, directory) + assert result == fw_utils.UploadedCode('s3://m', 'mnist.py') def test_validate_source_dir_does_not_exits(sagemaker_session): script = 'mnist.py' directory = ' !@#$%^&*()path probably in not there.!@#$%^&*()' with pytest.raises(ValueError): - validate_source_dir(script, directory) + fw_utils.validate_source_dir(script, directory) def test_validate_source_dir_is_not_directory(sagemaker_session): script = 'mnist.py' directory = inspect.getfile(inspect.currentframe()) with pytest.raises(ValueError): - validate_source_dir(script, directory) + fw_utils.validate_source_dir(script, directory) def test_validate_source_dir_file_not_in_dir(): script = ' !@#$%^&*() .myscript. !@#$%^&*() ' directory = '.' with pytest.raises(ValueError): - validate_source_dir(script, directory) + fw_utils.validate_source_dir(script, directory) def test_tar_and_upload_dir_not_s3(sagemaker_session): @@ -138,31 +136,160 @@ def test_tar_and_upload_dir_not_s3(sagemaker_session): s3_key_prefix = 'something/source' script = os.path.basename(__file__) directory = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) - result = tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script, directory) - assert result == UploadedCode('s3://{}/{}/sourcedir.tar.gz'.format(bucket, s3_key_prefix), - script) + result = fw_utils.tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script, directory) + assert result == fw_utils.UploadedCode('s3://{}/{}/sourcedir.tar.gz'.format(bucket, s3_key_prefix), + script) + + +def file_tree(tmpdir, files=None, folders=None): + files = files or [] + folders = folders or [] + for file in files: + tmpdir.join(file).ensure(file=True) + + for folder in folders: + tmpdir.join(folder).ensure(dir=True) + + return str(tmpdir) + + +def test_tar_and_upload_dir_no_directory(sagemaker_session, tmpdir): + source_dir = file_tree(tmpdir, ['train.py']) + entrypoint = os.path.join(source_dir, 'train.py') + + with patch('shutil.rmtree'): + result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', entrypoint, None) + + assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz', + script_name='train.py') + + assert {'/train.py'} == list_source_dir_files(sagemaker_session, tmpdir) + + +def test_tar_and_upload_dir_with_directory(sagemaker_session, tmpdir): + file_tree(tmpdir, ['src-dir/train.py']) + source_dir = os.path.join(str(tmpdir), 'src-dir') + + with patch('shutil.rmtree'): + result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', 'train.py', source_dir) + + assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz', + script_name='train.py') + + assert {'/train.py'} == list_source_dir_files(sagemaker_session, tmpdir) + + +def test_tar_and_upload_dir_with_subdirectory(sagemaker_session, tmpdir): + file_tree(tmpdir, ['src-dir/sub/train.py']) + source_dir = os.path.join(str(tmpdir), 'src-dir') + + with patch('shutil.rmtree'): + result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', 'train.py', source_dir) + + assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz', + script_name='train.py') + + assert {'/sub/train.py'} == list_source_dir_files(sagemaker_session, tmpdir) + + +def test_tar_and_upload_dir_with_directory_and_files(sagemaker_session, tmpdir): + file_tree(tmpdir, ['src-dir/train.py', 'src-dir/laucher', 'src-dir/module/__init__.py']) + source_dir = os.path.join(str(tmpdir), 'src-dir') + + with patch('shutil.rmtree'): + result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', 'train.py', source_dir) + + assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz', + script_name='train.py') + + assert {'/laucher', '/module/__init__.py', '/train.py'} == list_source_dir_files(sagemaker_session, tmpdir) + + +def test_tar_and_upload_dir_with_directories_and_files(sagemaker_session, tmpdir): + file_tree(tmpdir, ['src-dir/a/b', 'src-dir/a/b2', 'src-dir/x/y', 'src-dir/x/y2', 'src-dir/z']) + source_dir = os.path.join(str(tmpdir), 'src-dir') + + with patch('shutil.rmtree'): + result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', 'a/b', source_dir) + + assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz', + script_name='a/b') + + assert {'/a/b', '/a/b2', '/x/y', '/x/y2', '/z'} == list_source_dir_files(sagemaker_session, tmpdir) + + +def test_tar_and_upload_dir_with_many_folders(sagemaker_session, tmpdir): + file_tree(tmpdir, ['src-dir/a/b', 'src-dir/a/b2', 'common/x/y', 'common/x/y2', 't/y/z']) + source_dir = os.path.join(str(tmpdir), 'src-dir') + dependencies = [os.path.join(str(tmpdir), 'common'), os.path.join(str(tmpdir), 't', 'y', 'z')] + + with patch('shutil.rmtree'): + result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', + 'model.py', source_dir, dependencies) + + assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz', + script_name='model.py') + + assert {'/a/b', '/a/b2', '/common/x/y', '/common/x/y2', '/z'} == list_source_dir_files(sagemaker_session, tmpdir) + + +def test_test_tar_and_upload_dir_with_subfolders(sagemaker_session, tmpdir): + file_tree(tmpdir, ['a/b/c', 'a/b/c2']) + root = file_tree(tmpdir, ['x/y/z', 'x/y/z2']) + + with patch('shutil.rmtree'): + result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', 'b/c', + os.path.join(root, 'a'), [os.path.join(root, 'x')]) + + assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz', + script_name='b/c') + + assert {'/b/c', '/b/c2', '/x/y/z', '/x/y/z2'} == list_source_dir_files(sagemaker_session, tmpdir) + + +def list_source_dir_files(sagemaker_session, tmpdir): + source_dir_tar = sagemaker_session.resource('s3').Object().upload_file.call_args[0][0] + + source_dir_files = list_tar_files('/opt/ml/code/', source_dir_tar, tmpdir) + return source_dir_files + + +def list_tar_files(folder, tar_ball, tmpdir): + startpath = str(tmpdir.ensure(folder, dir=True)) + + with tarfile.open(name=tar_ball, mode='r:gz') as t: + t.extractall(path=startpath) + + def walk(): + for root, dirs, files in os.walk(startpath): + path = root.replace(startpath, '') + for f in files: + yield '%s/%s' % (path, f) + + result = set(walk()) + return result if result else {} def test_framework_name_from_image_mxnet(): image_name = '123.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.1-gpu-py3' - assert ('mxnet', 'py3', '1.1-gpu-py3') == framework_name_from_image(image_name) + assert ('mxnet', 'py3', '1.1-gpu-py3') == fw_utils.framework_name_from_image(image_name) def test_framework_name_from_image_tf(): image_name = '123.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow:1.6-cpu-py2' - assert ('tensorflow', 'py2', '1.6-cpu-py2') == framework_name_from_image(image_name) + assert ('tensorflow', 'py2', '1.6-cpu-py2') == fw_utils.framework_name_from_image(image_name) def test_legacy_name_from_framework_image(): image_name = '123.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py3-gpu:2.5.6-gpu-py2' - framework, py_ver, tag = framework_name_from_image(image_name) + framework, py_ver, tag = fw_utils.framework_name_from_image(image_name) assert framework == 'mxnet' assert py_ver == 'py3' assert tag == '2.5.6-gpu-py2' def test_legacy_name_from_wrong_framework(): - framework, py_ver, tag = framework_name_from_image( + framework, py_ver, tag = fw_utils.framework_name_from_image( '123.dkr.ecr.us-west-2.amazonaws.com/sagemaker-myown-py2-gpu:1') assert framework is None assert py_ver is None @@ -170,7 +297,7 @@ def test_legacy_name_from_wrong_framework(): def test_legacy_name_from_wrong_python(): - framework, py_ver, tag = framework_name_from_image( + framework, py_ver, tag = fw_utils.framework_name_from_image( '123.dkr.ecr.us-west-2.amazonaws.com/sagemaker-myown-py4-gpu:1') assert framework is None assert py_ver is None @@ -178,7 +305,7 @@ def test_legacy_name_from_wrong_python(): def test_legacy_name_from_wrong_device(): - framework, py_ver, tag = framework_name_from_image( + framework, py_ver, tag = fw_utils.framework_name_from_image( '123.dkr.ecr.us-west-2.amazonaws.com/sagemaker-myown-py4-gpu:1') assert framework is None assert py_ver is None @@ -187,63 +314,63 @@ def test_legacy_name_from_wrong_device(): def test_legacy_name_from_image_any_tag(): image_name = '123.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow-py2-cpu:any-tag' - framework, py_ver, tag = framework_name_from_image(image_name) + framework, py_ver, tag = fw_utils.framework_name_from_image(image_name) assert framework == 'tensorflow' assert py_ver == 'py2' assert tag == 'any-tag' def test_framework_version_from_tag(): - version = framework_version_from_tag('1.5rc-keras-gpu-py2') + version = fw_utils.framework_version_from_tag('1.5rc-keras-gpu-py2') assert version == '1.5rc-keras' def test_framework_version_from_tag_other(): - version = framework_version_from_tag('weird-tag-py2') + version = fw_utils.framework_version_from_tag('weird-tag-py2') assert version is None def test_parse_s3_url(): - bucket, key_prefix = parse_s3_url('s3://bucket/code_location') + bucket, key_prefix = fw_utils.parse_s3_url('s3://bucket/code_location') assert 'bucket' == bucket assert 'code_location' == key_prefix def test_parse_s3_url_fail(): with pytest.raises(ValueError) as error: - parse_s3_url('t3://code_location') + fw_utils.parse_s3_url('t3://code_location') assert 'Expecting \'s3\' scheme' in str(error) def test_model_code_key_prefix_with_all_values_present(): - key_prefix = model_code_key_prefix('prefix', 'model_name', 'image_name') + key_prefix = fw_utils.model_code_key_prefix('prefix', 'model_name', 'image_name') assert key_prefix == 'prefix/model_name' def test_model_code_key_prefix_with_no_prefix_and_all_other_values_present(): - key_prefix = model_code_key_prefix(None, 'model_name', 'image_name') + key_prefix = fw_utils.model_code_key_prefix(None, 'model_name', 'image_name') assert key_prefix == 'model_name' @patch('time.strftime', return_value=TIMESTAMP) def test_model_code_key_prefix_with_only_image_present(time): - key_prefix = model_code_key_prefix(None, None, 'image_name') + key_prefix = fw_utils.model_code_key_prefix(None, None, 'image_name') assert key_prefix == name_from_image('image_name') @patch('time.strftime', return_value=TIMESTAMP) def test_model_code_key_prefix_and_image_present(time): - key_prefix = model_code_key_prefix('prefix', None, 'image_name') + key_prefix = fw_utils.model_code_key_prefix('prefix', None, 'image_name') assert key_prefix == 'prefix/' + name_from_image('image_name') def test_model_code_key_prefix_with_prefix_present_and_others_none_fail(): with pytest.raises(TypeError) as error: - model_code_key_prefix('prefix', None, None) + fw_utils.model_code_key_prefix('prefix', None, None) assert 'expected string' in str(error) def test_model_code_key_prefix_with_all_none_fail(): with pytest.raises(TypeError) as error: - model_code_key_prefix(None, None, None) + fw_utils.model_code_key_prefix(None, None, None) assert 'expected string' in str(error) diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py index 1d3da9dc7b..395c8e5646 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_model.py @@ -54,9 +54,11 @@ def sagemaker_session(): return sms -@patch('tarfile.open') +@patch('shutil.rmtree', MagicMock()) +@patch('tarfile.open', MagicMock()) +@patch('os.listdir', MagicMock(return_value=['blah.py'])) @patch('time.strftime', return_value=TIMESTAMP) -def test_prepare_container_def(tfopen, time, sagemaker_session): +def test_prepare_container_def(time, sagemaker_session): model = DummyFrameworkModel(sagemaker_session) assert model.prepare_container_def(INSTANCE_TYPE) == { 'Environment': {'SAGEMAKER_PROGRAM': 'blah.py', @@ -68,13 +70,14 @@ def test_prepare_container_def(tfopen, time, sagemaker_session): 'ModelDataUrl': 's3://bucket/model.tar.gz'} -@patch('tarfile.open') -@patch('os.path.exists', return_value=True) -@patch('os.path.isdir', return_value=True) -@patch('os.listdir', return_value=['blah.py']) -@patch('time.strftime', return_value=TIMESTAMP) -def test_create_no_defaults(tfopen, exists, isdir, listdir, time, sagemaker_session): - model = DummyFrameworkModel(sagemaker_session, source_dir="sd", env={"a": "a"}, name="name", +@patch('shutil.rmtree', MagicMock()) +@patch('tarfile.open', MagicMock()) +@patch('os.path.exists', MagicMock(return_value=True)) +@patch('os.path.isdir', MagicMock(return_value=True)) +@patch('os.listdir', MagicMock(return_value=['blah.py'])) +@patch('time.strftime', MagicMock(return_value=TIMESTAMP)) +def test_create_no_defaults(sagemaker_session, tmpdir): + model = DummyFrameworkModel(sagemaker_session, source_dir='sd', env={"a": "a"}, name="name", enable_cloudwatch_metrics=True, container_log_level=55, code_location="s3://cb/cp") @@ -89,10 +92,10 @@ def test_create_no_defaults(tfopen, exists, isdir, listdir, time, sagemaker_sess 'ModelDataUrl': 's3://bucket/model.tar.gz'} -@patch('tarfile.open') -@patch('time.strftime', return_value=TIMESTAMP) -def test_deploy(tfo, time, sagemaker_session): - model = DummyFrameworkModel(sagemaker_session) +@patch('sagemaker.fw_utils.tar_and_upload_dir', MagicMock()) +@patch('time.strftime', MagicMock(return_value=TIMESTAMP)) +def test_deploy(sagemaker_session, tmpdir): + model = DummyFrameworkModel(sagemaker_session, source_dir=str(tmpdir)) model.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=1) sagemaker_session.endpoint_from_production_variants.assert_called_with( 'mi-2017-10-10-14-14-15', @@ -104,10 +107,10 @@ def test_deploy(tfo, time, sagemaker_session): None) -@patch('tarfile.open') -@patch('time.strftime', return_value=TIMESTAMP) -def test_deploy_endpoint_name(tfo, time, sagemaker_session): - model = DummyFrameworkModel(sagemaker_session) +@patch('sagemaker.fw_utils.tar_and_upload_dir', MagicMock()) +@patch('time.strftime', MagicMock(return_value=TIMESTAMP)) +def test_deploy_endpoint_name(sagemaker_session, tmpdir): + model = DummyFrameworkModel(sagemaker_session, source_dir=str(tmpdir)) model.deploy(endpoint_name='blah', instance_type=INSTANCE_TYPE, initial_instance_count=55) sagemaker_session.endpoint_from_production_variants.assert_called_with( 'blah', @@ -119,10 +122,10 @@ def test_deploy_endpoint_name(tfo, time, sagemaker_session): None) -@patch('tarfile.open') -@patch('time.strftime', return_value=TIMESTAMP) -def test_deploy_tags(tfo, time, sagemaker_session): - model = DummyFrameworkModel(sagemaker_session) +@patch('sagemaker.fw_utils.tar_and_upload_dir', MagicMock()) +@patch('time.strftime', MagicMock(return_value=TIMESTAMP)) +def test_deploy_tags(sagemaker_session, tmpdir): + model = DummyFrameworkModel(sagemaker_session, source_dir=str(tmpdir)) tags = [{'ModelName': 'TestModel'}] model.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=1, tags=tags) sagemaker_session.endpoint_from_production_variants.assert_called_with( @@ -135,17 +138,16 @@ def test_deploy_tags(tfo, time, sagemaker_session): tags) -@patch('sagemaker.model.Session') -@patch('sagemaker.model.LocalSession') -@patch('tarfile.open', MagicMock()) -def test_deploy_creates_correct_session(local_session, session): - +@patch('sagemaker.session.Session') +@patch('sagemaker.local.LocalSession') +@patch('sagemaker.fw_utils.tar_and_upload_dir', MagicMock()) +def test_deploy_creates_correct_session(local_session, session, tmpdir): # We expect a LocalSession when deploying to instance_type = 'local' - model = DummyFrameworkModel(sagemaker_session=None) + model = DummyFrameworkModel(sagemaker_session=None, source_dir=str(tmpdir)) model.deploy(endpoint_name='blah', instance_type='local', initial_instance_count=1) assert model.sagemaker_session == local_session.return_value # We expect a real Session when deploying to instance_type != local/local_gpu - model = DummyFrameworkModel(sagemaker_session=None) + model = DummyFrameworkModel(sagemaker_session=None, source_dir=str(tmpdir)) model.deploy(endpoint_name='remote_endpoint', instance_type='ml.m4.4xlarge', initial_instance_count=2) assert model.sagemaker_session == session.return_value diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py index b69c40f8be..48252d228c 100644 --- a/tests/unit/test_mxnet.py +++ b/tests/unit/test_mxnet.py @@ -17,7 +17,7 @@ import json import os import pytest -from mock import Mock +from mock import MagicMock, Mock from mock import patch from sagemaker.mxnet import defaults @@ -101,6 +101,7 @@ def _create_train_job(version): } +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_create_model(sagemaker_session, mxnet_version): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' @@ -168,6 +169,7 @@ def test_create_model_with_custom_image(sagemaker_session): assert model.source_dir == source_dir +@patch('sagemaker.utils.create_tar_file', MagicMock()) @patch('time.strftime', return_value=TIMESTAMP) def test_mxnet(strftime, sagemaker_session, mxnet_version): mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, @@ -207,6 +209,7 @@ def test_mxnet(strftime, sagemaker_session, mxnet_version): assert isinstance(predictor, MXNetPredictor) +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_model(sagemaker_session): model = MXNetModel("s3://some/data.tar.gz", role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session) diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py index e9705b9822..ca986b083f 100644 --- a/tests/unit/test_pytorch.py +++ b/tests/unit/test_pytorch.py @@ -17,7 +17,7 @@ import os import pytest import sys -from mock import Mock +from mock import MagicMock, Mock from mock import patch from sagemaker.pytorch import defaults @@ -184,6 +184,7 @@ def test_create_model_with_custom_image(sagemaker_session): assert model.source_dir == source_dir +@patch('sagemaker.utils.create_tar_file', MagicMock()) @patch('time.strftime', return_value=TIMESTAMP) def test_pytorch(strftime, sagemaker_session, pytorch_version): pytorch = PyTorch(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, @@ -223,6 +224,7 @@ def test_pytorch(strftime, sagemaker_session, pytorch_version): assert isinstance(predictor, PyTorchPredictor) +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_model(sagemaker_session): model = PyTorchModel("s3://some/data.tar.gz", role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session) diff --git a/tests/unit/test_tf_estimator.py b/tests/unit/test_tf_estimator.py index 8dc27ca101..ef4d744279 100644 --- a/tests/unit/test_tf_estimator.py +++ b/tests/unit/test_tf_estimator.py @@ -17,9 +17,9 @@ import os import pytest -from mock import patch, Mock +from mock import patch, Mock, MagicMock -from sagemaker.fw_utils import create_image_uri, UploadedCode +from sagemaker.fw_utils import create_image_uri from sagemaker.model import MODEL_SERVER_WORKERS_PARAM_NAME from sagemaker.session import s3_input from sagemaker.tensorflow import defaults, TensorFlow, TensorFlowModel, TensorFlowPredictor @@ -162,6 +162,7 @@ def test_tf_support_gpu_instances(sagemaker_session, tf_version): assert tf.train_image() == _get_full_gpu_image_uri(tf_version) +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_tf_deploy_model_server_workers(sagemaker_session): tf = _build_tf(sagemaker_session) tf.fit(inputs=s3_input('s3://mybucket/train')) @@ -172,6 +173,7 @@ def test_tf_deploy_model_server_workers(sagemaker_session): MODEL_SERVER_WORKERS_PARAM_NAME.upper()] +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_tf_deploy_model_server_workers_unset(sagemaker_session): tf = _build_tf(sagemaker_session) tf.fit(inputs=s3_input('s3://mybucket/train')) @@ -259,20 +261,16 @@ def test_create_model_with_custom_image(sagemaker_session): assert model.image == custom_image -@patch('time.strftime', return_value=TIMESTAMP) -@patch('time.time', return_value=TIME) -@patch('sagemaker.estimator.tar_and_upload_dir') -@patch('sagemaker.model.tar_and_upload_dir') -def test_tf(m_tar, e_tar, time, strftime, sagemaker_session, tf_version): +@patch('sagemaker.utils.create_tar_file', MagicMock()) +@patch('time.strftime', MagicMock(return_value=TIMESTAMP)) +@patch('time.time', MagicMock(return_value=TIME)) +def test_tf(sagemaker_session, tf_version): tf = TensorFlow(entry_point=SCRIPT_FILE, role=ROLE, sagemaker_session=sagemaker_session, training_steps=1000, evaluation_steps=10, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, framework_version=tf_version, requirements_file=REQUIREMENTS_FILE, source_dir=DATA_DIR) inputs = 's3://mybucket/train' - s3_prefix = 's3://{}/{}/source/sourcedir.tar.gz'.format(BUCKET_NAME, JOB_NAME) - e_tar.return_value = UploadedCode(s3_prefix=s3_prefix, script_name=SCRIPT_FILE) - s3_prefix = 's3://{}/{}/sourcedir.tar.gz'.format(BUCKET_NAME, JOB_NAME) - m_tar.return_value = UploadedCode(s3_prefix=s3_prefix, script_name=SCRIPT_FILE) + tf.fit(inputs=inputs) call_names = [c[0] for c in sagemaker_session.method_calls] @@ -288,7 +286,8 @@ def test_tf(m_tar, e_tar, time, strftime, sagemaker_session, tf_version): environment = { 'Environment': { - 'SAGEMAKER_SUBMIT_DIRECTORY': 's3://{}/{}/sourcedir.tar.gz'.format(BUCKET_NAME, JOB_NAME), + 'SAGEMAKER_SUBMIT_DIRECTORY': + 's3://mybucket/sagemaker-tensorflow-2017-11-06-14:14:15.673/source/sourcedir.tar.gz', 'SAGEMAKER_PROGRAM': 'dummy_script.py', 'SAGEMAKER_REQUIREMENTS': 'dummy_requirements.txt', 'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false', 'SAGEMAKER_REGION': 'us-west-2', 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20' @@ -318,6 +317,7 @@ def test_run_tensorboard_locally_without_tensorboard_binary(time, strftime, pope 'following command: \n pip install tensorboard' +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_model(sagemaker_session, tf_version): model = TensorFlowModel("s3://some/data.tar.gz", role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session) @@ -340,6 +340,7 @@ def test_run_tensorboard_locally_without_awscli_binary(time, strftime, popen, ca 'following command: \n pip install awscli' +@patch('sagemaker.utils.create_tar_file', MagicMock()) @patch('sagemaker.tensorflow.estimator.Tensorboard._sync_directories') @patch('tempfile.mkdtemp', return_value='/my/temp/folder') @patch('shutil.rmtree') @@ -362,6 +363,7 @@ def test_run_tensorboard_locally(sleep, time, strftime, popen, call, access, rmt stdout=-1) +@patch('sagemaker.utils.create_tar_file', MagicMock()) @patch('sagemaker.tensorflow.estimator.Tensorboard._sync_directories') @patch('tempfile.mkdtemp', return_value='/my/temp/folder') @patch('shutil.rmtree') @@ -388,6 +390,7 @@ def test_run_tensorboard_locally_port_in_use(sleep, time, strftime, popen, call, stderr=-1, stdout=-1) +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_tf_checkpoint_not_set(sagemaker_session): job_name = "sagemaker-tensorflow-py2-gpu-2017-10-24-14-12-09" tf = _build_tf(sagemaker_session, checkpoint_path=None, base_job_name=job_name, @@ -398,6 +401,7 @@ def test_tf_checkpoint_not_set(sagemaker_session): assert tf.hyperparameters()['checkpoint_path'] == expected_result +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_tf_training_and_evaluation_steps_not_set(sagemaker_session): job_name = "sagemaker-tensorflow-py2-gpu-2017-10-24-14-12-09" output_path = "s3://{}/output/{}/".format(sagemaker_session.default_bucket(), job_name) @@ -408,6 +412,7 @@ def test_tf_training_and_evaluation_steps_not_set(sagemaker_session): assert tf.hyperparameters()['evaluation_steps'] == 'null' +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_tf_training_and_evaluation_steps(sagemaker_session): job_name = "sagemaker-tensorflow-py2-gpu-2017-10-24-14-12-09" output_path = "s3://{}/output/{}/".format(sagemaker_session.default_bucket(), job_name) @@ -418,11 +423,13 @@ def test_tf_training_and_evaluation_steps(sagemaker_session): assert tf.hyperparameters()['evaluation_steps'] == '456' +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_tf_checkpoint_set(sagemaker_session): tf = _build_tf(sagemaker_session, checkpoint_path='s3://my_checkpoint_bucket') assert tf.hyperparameters()['checkpoint_path'] == json.dumps("s3://my_checkpoint_bucket") +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_train_image_default(sagemaker_session): tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, @@ -433,6 +440,7 @@ def test_train_image_default(sagemaker_session): assert _get_full_cpu_image_uri(defaults.TF_VERSION) in tf.train_image() +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_attach(sagemaker_session, tf_version): training_image = '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow-py2-cpu:{}-cpu-py2'.format(tf_version) rjd = { @@ -483,6 +491,7 @@ def test_attach(sagemaker_session, tf_version): assert estimator.checkpoint_path == 's3://other/1508872349' +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_attach_new_repo_name(sagemaker_session, tf_version): training_image = '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow:{}-cpu-py2'.format(tf_version) rjd = { @@ -531,6 +540,7 @@ def test_attach_new_repo_name(sagemaker_session, tf_version): assert estimator.train_image() == training_image +@patch('sagemaker.utils.create_tar_file', MagicMock()) def test_attach_old_container(sagemaker_session): training_image = '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow-py2-cpu:1.0' rjd = { @@ -707,18 +717,16 @@ def test_script_mode_create_model(create_tfs_model, sagemaker_session): create_tfs_model.assert_called_once() +@patch('sagemaker.utils.create_tar_file', MagicMock()) @patch('sagemaker.tensorflow.estimator.Tensorboard._sync_directories') @patch('sagemaker.tensorflow.estimator.Tensorboard.start') -@patch('tempfile.mkdtemp', return_value='/my/temp/folder') -@patch('shutil.rmtree') @patch('os.access', return_value=True) @patch('subprocess.call') @patch('subprocess.Popen') @patch('time.strftime', return_value=TIMESTAMP) @patch('time.time', return_value=TIME) @patch('time.sleep') -def test_script_mode_tensorboard(sleep, time, strftime, popen, call, access, rmtree, mkdtemp, - start, sync, sagemaker_session): +def test_script_mode_tensorboard(sleep, time, strftime, popen, call, access, start, sync, sagemaker_session): tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, framework_version='some_version', script_mode=True) @@ -729,18 +737,13 @@ def test_script_mode_tensorboard(sleep, time, strftime, popen, call, access, rmt @patch('time.strftime', return_value=TIMESTAMP) @patch('time.time', return_value=TIME) -@patch('sagemaker.estimator.tar_and_upload_dir') -@patch('sagemaker.model.tar_and_upload_dir') -def test_tf_script_mode(m_tar, e_tar, time, strftime, sagemaker_session): +@patch('sagemaker.utils.create_tar_file', MagicMock()) +def test_tf_script_mode(time, strftime, sagemaker_session): tf = TensorFlow(entry_point=SCRIPT_FILE, role=ROLE, sagemaker_session=sagemaker_session, py_version='py3', train_instance_type=INSTANCE_TYPE, train_instance_count=1, framework_version='1.11', source_dir=DATA_DIR) inputs = 's3://mybucket/train' - s3_prefix = 's3://{}/{}/source/sourcedir.tar.gz'.format(BUCKET_NAME, SM_JOB_NAME) - e_tar.return_value = UploadedCode(s3_prefix=s3_prefix, script_name=SCRIPT_FILE) - s3_prefix = 's3://{}/{}/sourcedir.tar.gz'.format(BUCKET_NAME, SM_JOB_NAME) - m_tar.return_value = UploadedCode(s3_prefix=s3_prefix, script_name=SCRIPT_FILE) tf.fit(inputs=inputs) call_names = [c[0] for c in sagemaker_session.method_calls] @@ -755,18 +758,13 @@ def test_tf_script_mode(m_tar, e_tar, time, strftime, sagemaker_session): @patch('time.strftime', return_value=TIMESTAMP) @patch('time.time', return_value=TIME) -@patch('sagemaker.estimator.tar_and_upload_dir') -@patch('sagemaker.model.tar_and_upload_dir') -def test_tf_script_mode_ps(m_tar, e_tar, time, strftime, sagemaker_session): +@patch('sagemaker.utils.create_tar_file', MagicMock()) +def test_tf_script_mode_ps(time, strftime, sagemaker_session): tf = TensorFlow(entry_point=SCRIPT_FILE, role=ROLE, sagemaker_session=sagemaker_session, py_version='py3', train_instance_type=INSTANCE_TYPE, train_instance_count=1, framework_version='1.11', source_dir=DATA_DIR, distributions=DISTRIBUTION_ENABLED) inputs = 's3://mybucket/train' - s3_prefix = 's3://{}/{}/source/sourcedir.tar.gz'.format(BUCKET_NAME, SM_JOB_NAME) - e_tar.return_value = UploadedCode(s3_prefix=s3_prefix, script_name=SCRIPT_FILE) - s3_prefix = 's3://{}/{}/sourcedir.tar.gz'.format(BUCKET_NAME, SM_JOB_NAME) - m_tar.return_value = UploadedCode(s3_prefix=s3_prefix, script_name=SCRIPT_FILE) tf.fit(inputs=inputs) call_names = [c[0] for c in sagemaker_session.method_calls] diff --git a/tests/unit/test_transformer.py b/tests/unit/test_transformer.py index 1342cf5817..dbd84b8528 100644 --- a/tests/unit/test_transformer.py +++ b/tests/unit/test_transformer.py @@ -13,7 +13,7 @@ from __future__ import absolute_import import pytest -from mock import Mock, patch +from mock import MagicMock, Mock, patch from sagemaker.transformer import Transformer, _TransformJob @@ -40,6 +40,12 @@ } +@pytest.fixture(autouse=True) +def mock_create_tar_file(): + with patch('sagemaker.utils.create_tar_file', MagicMock()) as create_tar_file: + yield create_tar_file + + @pytest.fixture() def sagemaker_session(): boto_mock = Mock(name='boto_session')