From 1124f08de113f82489c3798fc9beaf3c0b5b3fe6 Mon Sep 17 00:00:00 2001 From: Nadia Yakimakha <32335935+nadiaya@users.noreply.github.com> Date: Sun, 18 Nov 2018 02:02:34 -0800 Subject: [PATCH 1/5] Add local mode support for intermediate output functionality. --- src/sagemaker/estimator.py | 3 ++- src/sagemaker/local/image.py | 18 ++++++++++++++++-- src/sagemaker/model.py | 1 + 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index 920ef814ba..61fd5fd46a 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -28,7 +28,8 @@ from sagemaker.local import LocalSession from sagemaker.model import Model from sagemaker.model import (SCRIPT_PARAM_NAME, DIR_PARAM_NAME, CLOUDWATCH_METRICS_PARAM_NAME, - CONTAINER_LOG_LEVEL_PARAM_NAME, JOB_NAME_PARAM_NAME, SAGEMAKER_REGION_PARAM_NAME) + CONTAINER_LOG_LEVEL_PARAM_NAME, JOB_NAME_PARAM_NAME, + SAGEMAKER_REGION_PARAM_NAME, SAGEMAKER_OUTPUT_LOCATION) from sagemaker.predictor import RealTimePredictor from sagemaker.session import Session from sagemaker.session import s3_input diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py index 56196639d7..8f2d2a5481 100644 --- a/src/sagemaker/local/image.py +++ b/src/sagemaker/local/image.py @@ -101,7 +101,8 @@ def train(self, input_data_config, output_data_config, hyperparameters, job_name os.mkdir(shared_dir) data_dir = self._create_tmp_folder() - volumes = self._prepare_training_volumes(data_dir, input_data_config, hyperparameters) + volumes = self._prepare_training_volumes(data_dir, input_data_config, output_data_config, + hyperparameters) # Create the configuration files for each container that we will create # Each container will map the additional local volumes (if any). @@ -275,7 +276,8 @@ def write_config_files(self, host, hyperparameters, input_data_config): _write_json_file(os.path.join(config_path, 'resourceconfig.json'), resource_config) _write_json_file(os.path.join(config_path, 'inputdataconfig.json'), json_input_data_config) - def _prepare_training_volumes(self, data_dir, input_data_config, hyperparameters): + def _prepare_training_volumes(self, data_dir, input_data_config, output_data_config, + hyperparameters): shared_dir = os.path.join(self.container_root, 'shared') model_dir = os.path.join(self.container_root, 'model') volumes = [] @@ -303,6 +305,18 @@ def _prepare_training_volumes(self, data_dir, input_data_config, hyperparameters # Also mount a directory that all the containers can access. volumes.append(_Volume(shared_dir, '/opt/ml/shared')) + parsed_uri = urlparse(output_data_config['S3OutputPath']) + print("output_data_config['S3OutputPath']: {}".format(output_data_config['S3OutputPath'])) + print('sagemaker.estimator.SAGEMAKER_OUTPUT_LOCATION in hyperparameters: {}'.format( + sagemaker.estimator.SAGEMAKER_OUTPUT_LOCATION in hyperparameters + )) + if parsed_uri.scheme == 'file' \ + and sagemaker.estimator.SAGEMAKER_OUTPUT_LOCATION in hyperparameters: + intermediate_dir = os.path.join(parsed_uri.path, 'output', 'intermediate') + if not os.path.exists(intermediate_dir): + os.makedirs(intermediate_dir) + volumes.append(_Volume(intermediate_dir, '/opt/ml/output/intermediate')) + return volumes def _prepare_serving_volumes(self, model_location): diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index 972fdb7961..3ce81d2e2e 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -117,6 +117,7 @@ def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags JOB_NAME_PARAM_NAME = 'sagemaker_job_name' MODEL_SERVER_WORKERS_PARAM_NAME = 'sagemaker_model_server_workers' SAGEMAKER_REGION_PARAM_NAME = 'sagemaker_region' +SAGEMAKER_OUTPUT_LOCATION = 'sagemaker_s3_output' class FrameworkModel(Model): From 0d390769d472f823ae074c8bdb3fee1c6f577b0d Mon Sep 17 00:00:00 2001 From: Nadia Yakimakha <32335935+nadiaya@users.noreply.github.com> Date: Tue, 4 Dec 2018 23:07:00 -0800 Subject: [PATCH 2/5] Add unit tests for intermediate output folder. --- src/sagemaker/local/image.py | 4 ---- tests/unit/test_image.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py index 86c953ebf8..0e2fa98425 100644 --- a/src/sagemaker/local/image.py +++ b/src/sagemaker/local/image.py @@ -309,10 +309,6 @@ def _prepare_training_volumes(self, data_dir, input_data_config, output_data_con volumes.append(_Volume(shared_dir, '/opt/ml/shared')) parsed_uri = urlparse(output_data_config['S3OutputPath']) - print("output_data_config['S3OutputPath']: {}".format(output_data_config['S3OutputPath'])) - print('sagemaker.estimator.SAGEMAKER_OUTPUT_LOCATION in hyperparameters: {}'.format( - sagemaker.estimator.SAGEMAKER_OUTPUT_LOCATION in hyperparameters - )) if parsed_uri.scheme == 'file' \ and sagemaker.estimator.SAGEMAKER_OUTPUT_LOCATION in hyperparameters: intermediate_dir = os.path.join(parsed_uri.path, 'output', 'intermediate') diff --git a/tests/unit/test_image.py b/tests/unit/test_image.py index 5a2cbb39fe..a5306c0d75 100644 --- a/tests/unit/test_image.py +++ b/tests/unit/test_image.py @@ -396,6 +396,41 @@ def test_train_local_code(tmpdir, sagemaker_session): assert '%s:/opt/ml/shared' % shared_folder_path in volumes +@patch('sagemaker.local.local_session.LocalSession', Mock()) +@patch('sagemaker.local.image._stream_output', Mock()) +@patch('sagemaker.local.image._SageMakerContainer._cleanup', Mock()) +@patch('sagemaker.local.data.get_data_source_instance', Mock()) +@patch('subprocess.Popen', Mock()) +def test_train_local_intermediate_output(tmpdir, sagemaker_session): + directories = [str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data'))] + with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', + side_effect=directories): + instance_count = 2 + image = 'my-image' + sagemaker_container = _SageMakerContainer('local', instance_count, image, + sagemaker_session=sagemaker_session) + + output_path = str(tmpdir.mkdir('customer_intermediate_output')) + output_data_config = {'S3OutputPath': 'file://%s' % output_path} + hyperparameters = {'sagemaker_s3_output': output_path} + + sagemaker_container.train( + INPUT_DATA_CONFIG, output_data_config, hyperparameters, TRAINING_JOB_NAME) + + docker_compose_file = os.path.join(sagemaker_container.container_root, + 'docker-compose.yaml') + intermediate_folder_path = os.path.join(output_path, 'output/intermediate') + + with open(docker_compose_file, 'r') as f: + config = yaml.load(f) + assert len(config['services']) == instance_count + for h in sagemaker_container.hosts: + assert config['services'][h]['image'] == image + assert config['services'][h]['command'] == 'train' + volumes = config['services'][h]['volumes'] + assert '%s:/opt/ml/output/intermediate' % intermediate_folder_path in volumes + + def test_container_has_gpu_support(tmpdir, sagemaker_session): instance_count = 1 image = 'my-image' From c9b063429549c5d53f07be196c6de96b2f82d779 Mon Sep 17 00:00:00 2001 From: Nadia Yakimakha <32335935+nadiaya@users.noreply.github.com> Date: Tue, 4 Dec 2018 23:14:31 -0800 Subject: [PATCH 3/5] Use the right SAGEMAKER_OUTPUT_LOCATION constant. --- src/sagemaker/estimator.py | 3 +-- src/sagemaker/local/image.py | 2 +- src/sagemaker/model.py | 1 - src/sagemaker/rl/__init__.py | 5 +++-- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index 117331b458..95865e8761 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -29,8 +29,7 @@ from sagemaker.local import LocalSession from sagemaker.model import Model, NEO_ALLOWED_TARGET_INSTANCE_FAMILY, NEO_ALLOWED_FRAMEWORKS from sagemaker.model import (SCRIPT_PARAM_NAME, DIR_PARAM_NAME, CLOUDWATCH_METRICS_PARAM_NAME, - CONTAINER_LOG_LEVEL_PARAM_NAME, JOB_NAME_PARAM_NAME, - SAGEMAKER_REGION_PARAM_NAME, SAGEMAKER_OUTPUT_LOCATION) + CONTAINER_LOG_LEVEL_PARAM_NAME, JOB_NAME_PARAM_NAME, SAGEMAKER_REGION_PARAM_NAME) from sagemaker.predictor import RealTimePredictor from sagemaker.session import Session from sagemaker.session import s3_input diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py index 0e2fa98425..b276bcb7a9 100644 --- a/src/sagemaker/local/image.py +++ b/src/sagemaker/local/image.py @@ -310,7 +310,7 @@ def _prepare_training_volumes(self, data_dir, input_data_config, output_data_con parsed_uri = urlparse(output_data_config['S3OutputPath']) if parsed_uri.scheme == 'file' \ - and sagemaker.estimator.SAGEMAKER_OUTPUT_LOCATION in hyperparameters: + and sagemaker.rl.SAGEMAKER_OUTPUT_LOCATION in hyperparameters: intermediate_dir = os.path.join(parsed_uri.path, 'output', 'intermediate') if not os.path.exists(intermediate_dir): os.makedirs(intermediate_dir) diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index 9572b834c0..d7b62f31e3 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -257,7 +257,6 @@ def deploy(self, initial_instance_count, instance_type, accelerator_type=None, e JOB_NAME_PARAM_NAME = 'sagemaker_job_name' MODEL_SERVER_WORKERS_PARAM_NAME = 'sagemaker_model_server_workers' SAGEMAKER_REGION_PARAM_NAME = 'sagemaker_region' -SAGEMAKER_OUTPUT_LOCATION = 'sagemaker_s3_output' class FrameworkModel(Model): diff --git a/src/sagemaker/rl/__init__.py b/src/sagemaker/rl/__init__.py index 62a0c1e934..3fdca8be3b 100644 --- a/src/sagemaker/rl/__init__.py +++ b/src/sagemaker/rl/__init__.py @@ -13,6 +13,7 @@ from __future__ import absolute_import from sagemaker.rl.estimator import (RLEstimator, RLToolkit, RLFramework, - TOOLKIT_FRAMEWORK_VERSION_MAP) + TOOLKIT_FRAMEWORK_VERSION_MAP, SAGEMAKER_OUTPUT_LOCATION) -__all__ = [RLEstimator, RLToolkit, RLFramework, TOOLKIT_FRAMEWORK_VERSION_MAP] +__all__ = [RLEstimator, RLToolkit, RLFramework, + TOOLKIT_FRAMEWORK_VERSION_MAP, SAGEMAKER_OUTPUT_LOCATION] From e749c90d0780b4a50defda98284c7d94768801c9 Mon Sep 17 00:00:00 2001 From: Nadia Yakimakha <32335935+nadiaya@users.noreply.github.com> Date: Tue, 4 Dec 2018 23:17:57 -0800 Subject: [PATCH 4/5] Update changelog. --- CHANGELOG.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d21d3f80ee..a65d1729db 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,11 @@ CHANGELOG ========= +1.16.2.dev +========== + +* feature: Local Mode: Add support for intermediate output to a local directory. + 1.16.1.post1 ============ From d8d1d1c204ac1f6e4190eaa0df0be7773bb5d2be Mon Sep 17 00:00:00 2001 From: Nadia Yakimakha <32335935+nadiaya@users.noreply.github.com> Date: Wed, 5 Dec 2018 22:32:49 -0800 Subject: [PATCH 5/5] Fix pylint. --- src/sagemaker/local/image.py | 2 +- src/sagemaker/rl/__init__.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py index b276bcb7a9..3aae4865ae 100644 --- a/src/sagemaker/local/image.py +++ b/src/sagemaker/local/image.py @@ -310,7 +310,7 @@ def _prepare_training_volumes(self, data_dir, input_data_config, output_data_con parsed_uri = urlparse(output_data_config['S3OutputPath']) if parsed_uri.scheme == 'file' \ - and sagemaker.rl.SAGEMAKER_OUTPUT_LOCATION in hyperparameters: + and sagemaker.rl.estimator.SAGEMAKER_OUTPUT_LOCATION in hyperparameters: intermediate_dir = os.path.join(parsed_uri.path, 'output', 'intermediate') if not os.path.exists(intermediate_dir): os.makedirs(intermediate_dir) diff --git a/src/sagemaker/rl/__init__.py b/src/sagemaker/rl/__init__.py index 3fdca8be3b..62a0c1e934 100644 --- a/src/sagemaker/rl/__init__.py +++ b/src/sagemaker/rl/__init__.py @@ -13,7 +13,6 @@ from __future__ import absolute_import from sagemaker.rl.estimator import (RLEstimator, RLToolkit, RLFramework, - TOOLKIT_FRAMEWORK_VERSION_MAP, SAGEMAKER_OUTPUT_LOCATION) + TOOLKIT_FRAMEWORK_VERSION_MAP) -__all__ = [RLEstimator, RLToolkit, RLFramework, - TOOLKIT_FRAMEWORK_VERSION_MAP, SAGEMAKER_OUTPUT_LOCATION] +__all__ = [RLEstimator, RLToolkit, RLFramework, TOOLKIT_FRAMEWORK_VERSION_MAP]