diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 62a1a1b9cc..f1d6df1b7a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,7 @@ CHANGELOG * doc-fix: Change ``distribution`` to ``distributions`` * bug-fix: Increase docker-compose http timeout and health check timeout to 120. +* feature: Local Mode: Add support for intermediate output to a local directory. 1.16.1.post1 ============ diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py index 77ecffc1e5..30b56ce885 100644 --- a/src/sagemaker/local/image.py +++ b/src/sagemaker/local/image.py @@ -104,7 +104,8 @@ def train(self, input_data_config, output_data_config, hyperparameters, job_name os.mkdir(shared_dir) data_dir = self._create_tmp_folder() - volumes = self._prepare_training_volumes(data_dir, input_data_config, hyperparameters) + volumes = self._prepare_training_volumes(data_dir, input_data_config, output_data_config, + hyperparameters) # Create the configuration files for each container that we will create # Each container will map the additional local volumes (if any). @@ -281,7 +282,8 @@ def write_config_files(self, host, hyperparameters, input_data_config): _write_json_file(os.path.join(config_path, 'resourceconfig.json'), resource_config) _write_json_file(os.path.join(config_path, 'inputdataconfig.json'), json_input_data_config) - def _prepare_training_volumes(self, data_dir, input_data_config, hyperparameters): + def _prepare_training_volumes(self, data_dir, input_data_config, output_data_config, + hyperparameters): shared_dir = os.path.join(self.container_root, 'shared') model_dir = os.path.join(self.container_root, 'model') volumes = [] @@ -309,6 +311,14 @@ def _prepare_training_volumes(self, data_dir, input_data_config, hyperparameters # Also mount a directory that all the containers can access. volumes.append(_Volume(shared_dir, '/opt/ml/shared')) + parsed_uri = urlparse(output_data_config['S3OutputPath']) + if parsed_uri.scheme == 'file' \ + and sagemaker.rl.estimator.SAGEMAKER_OUTPUT_LOCATION in hyperparameters: + intermediate_dir = os.path.join(parsed_uri.path, 'output', 'intermediate') + if not os.path.exists(intermediate_dir): + os.makedirs(intermediate_dir) + volumes.append(_Volume(intermediate_dir, '/opt/ml/output/intermediate')) + return volumes def _prepare_serving_volumes(self, model_location): diff --git a/tests/unit/test_image.py b/tests/unit/test_image.py index 5a2cbb39fe..a5306c0d75 100644 --- a/tests/unit/test_image.py +++ b/tests/unit/test_image.py @@ -396,6 +396,41 @@ def test_train_local_code(tmpdir, sagemaker_session): assert '%s:/opt/ml/shared' % shared_folder_path in volumes +@patch('sagemaker.local.local_session.LocalSession', Mock()) +@patch('sagemaker.local.image._stream_output', Mock()) +@patch('sagemaker.local.image._SageMakerContainer._cleanup', Mock()) +@patch('sagemaker.local.data.get_data_source_instance', Mock()) +@patch('subprocess.Popen', Mock()) +def test_train_local_intermediate_output(tmpdir, sagemaker_session): + directories = [str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data'))] + with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', + side_effect=directories): + instance_count = 2 + image = 'my-image' + sagemaker_container = _SageMakerContainer('local', instance_count, image, + sagemaker_session=sagemaker_session) + + output_path = str(tmpdir.mkdir('customer_intermediate_output')) + output_data_config = {'S3OutputPath': 'file://%s' % output_path} + hyperparameters = {'sagemaker_s3_output': output_path} + + sagemaker_container.train( + INPUT_DATA_CONFIG, output_data_config, hyperparameters, TRAINING_JOB_NAME) + + docker_compose_file = os.path.join(sagemaker_container.container_root, + 'docker-compose.yaml') + intermediate_folder_path = os.path.join(output_path, 'output/intermediate') + + with open(docker_compose_file, 'r') as f: + config = yaml.load(f) + assert len(config['services']) == instance_count + for h in sagemaker_container.hosts: + assert config['services'][h]['image'] == image + assert config['services'][h]['command'] == 'train' + volumes = config['services'][h]['volumes'] + assert '%s:/opt/ml/output/intermediate' % intermediate_folder_path in volumes + + def test_container_has_gpu_support(tmpdir, sagemaker_session): instance_count = 1 image = 'my-image'