Skip to content

Add support for intermediate output to a local directory in local mode. #524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Dec 8, 2018
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ CHANGELOG

* doc-fix: Change ``distribution`` to ``distributions``
* bug-fix: Increase docker-compose http timeout and health check timeout to 120.
* feature: Local Mode: Add support for intermediate output to a local directory.

1.16.1.post1
============
Expand Down
14 changes: 12 additions & 2 deletions src/sagemaker/local/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ def train(self, input_data_config, output_data_config, hyperparameters, job_name
os.mkdir(shared_dir)

data_dir = self._create_tmp_folder()
volumes = self._prepare_training_volumes(data_dir, input_data_config, hyperparameters)
volumes = self._prepare_training_volumes(data_dir, input_data_config, output_data_config,
hyperparameters)

# Create the configuration files for each container that we will create
# Each container will map the additional local volumes (if any).
Expand Down Expand Up @@ -281,7 +282,8 @@ def write_config_files(self, host, hyperparameters, input_data_config):
_write_json_file(os.path.join(config_path, 'resourceconfig.json'), resource_config)
_write_json_file(os.path.join(config_path, 'inputdataconfig.json'), json_input_data_config)

def _prepare_training_volumes(self, data_dir, input_data_config, hyperparameters):
def _prepare_training_volumes(self, data_dir, input_data_config, output_data_config,
hyperparameters):
shared_dir = os.path.join(self.container_root, 'shared')
model_dir = os.path.join(self.container_root, 'model')
volumes = []
Expand Down Expand Up @@ -309,6 +311,14 @@ def _prepare_training_volumes(self, data_dir, input_data_config, hyperparameters
# Also mount a directory that all the containers can access.
volumes.append(_Volume(shared_dir, '/opt/ml/shared'))

parsed_uri = urlparse(output_data_config['S3OutputPath'])
if parsed_uri.scheme == 'file' \
and sagemaker.rl.estimator.SAGEMAKER_OUTPUT_LOCATION in hyperparameters:
intermediate_dir = os.path.join(parsed_uri.path, 'output', 'intermediate')
if not os.path.exists(intermediate_dir):
os.makedirs(intermediate_dir)
volumes.append(_Volume(intermediate_dir, '/opt/ml/output/intermediate'))

return volumes

def _prepare_serving_volumes(self, model_location):
Expand Down
35 changes: 35 additions & 0 deletions tests/unit/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,41 @@ def test_train_local_code(tmpdir, sagemaker_session):
assert '%s:/opt/ml/shared' % shared_folder_path in volumes


@patch('sagemaker.local.local_session.LocalSession', Mock())
@patch('sagemaker.local.image._stream_output', Mock())
@patch('sagemaker.local.image._SageMakerContainer._cleanup', Mock())
@patch('sagemaker.local.data.get_data_source_instance', Mock())
@patch('subprocess.Popen', Mock())
def test_train_local_intermediate_output(tmpdir, sagemaker_session):
directories = [str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data'))]
with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
side_effect=directories):
instance_count = 2
image = 'my-image'
sagemaker_container = _SageMakerContainer('local', instance_count, image,
sagemaker_session=sagemaker_session)

output_path = str(tmpdir.mkdir('customer_intermediate_output'))
output_data_config = {'S3OutputPath': 'file://%s' % output_path}
hyperparameters = {'sagemaker_s3_output': output_path}

sagemaker_container.train(
INPUT_DATA_CONFIG, output_data_config, hyperparameters, TRAINING_JOB_NAME)

docker_compose_file = os.path.join(sagemaker_container.container_root,
'docker-compose.yaml')
intermediate_folder_path = os.path.join(output_path, 'output/intermediate')

with open(docker_compose_file, 'r') as f:
config = yaml.load(f)
assert len(config['services']) == instance_count
for h in sagemaker_container.hosts:
assert config['services'][h]['image'] == image
assert config['services'][h]['command'] == 'train'
volumes = config['services'][h]['volumes']
assert '%s:/opt/ml/output/intermediate' % intermediate_folder_path in volumes


def test_container_has_gpu_support(tmpdir, sagemaker_session):
instance_count = 1
image = 'my-image'
Expand Down