Skip to content

Commit 09f76e3

Browse files
authored
Merge branch 'master' into fix-localmode
2 parents a8f160c + 9d3c218 commit 09f76e3

File tree

5 files changed

+39
-5
lines changed

5 files changed

+39
-5
lines changed

CHANGELOG.rst

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ CHANGELOG
88
* feature: Local Mode: Add support for Batch Inference
99
* feature: Add timestamp to secondary status in training job output
1010
* bug-fix: Local Mode: Set correct default values for additional_volumes and additional_env_vars
11+
* enhancement: Local Mode: support nvidia-docker2 natively
1112

1213
1.11.2
1314
======

setup.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ def read(fname):
4444
],
4545

4646
# Declare minimal set for installation
47-
install_requires=['boto3>=1.4.8', 'numpy>=1.9.0', 'protobuf>=3.1', 'scipy>=0.19.0', 'urllib3>=1.2',
48-
'PyYAML>=3.2', 'protobuf3-to-dict>=0.1.5'],
47+
install_requires=['boto3>=1.4.8', 'numpy>=1.9.0', 'protobuf>=3.1', 'scipy>=0.19.0', 'urllib3 >=1.21, <1.23',
48+
'PyYAML>=3.2', 'protobuf3-to-dict>=0.1.5', 'docker-compose>=1.21.0'],
4949

5050
extras_require={
5151
'test': ['tox', 'flake8', 'pytest', 'pytest-cov', 'pytest-xdist',

src/sagemaker/local/image.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -362,8 +362,8 @@ def _generate_compose_file(self, command, additional_volumes=None, additional_en
362362
}
363363

364364
content = {
365-
# Some legacy hosts only support the 2.1 format.
366-
'version': '2.1',
365+
# Use version 2.3 as a minimum so that we can specify the runtime
366+
'version': '2.3',
367367
'services': services,
368368
'networks': {
369369
'sagemaker-local': {'name': 'sagemaker-local'}
@@ -415,6 +415,11 @@ def _create_docker_host(self, host, environment, optml_subdirs, command, volumes
415415
}
416416
}
417417

418+
# for GPU support pass in nvidia as the runtime, this is equivalent
419+
# to setting --runtime=nvidia in the docker commandline.
420+
if self.instance_type == 'local_gpu':
421+
host_config['runtime'] = 'nvidia'
422+
418423
if command == 'serve':
419424
serving_port = sagemaker.utils.get_config_value('local.serving_port',
420425
self.sagemaker_session.config) or 8080

tests/integ/test_local_mode.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -368,8 +368,9 @@ def test_mxnet_local_data_local_script():
368368
fcntl.lockf(local_mode_lock, fcntl.LOCK_UN)
369369

370370

371-
@pytest.mark.continuous_testing
372371
def test_local_transform_mxnet(sagemaker_local_session, tmpdir):
372+
local_mode_lock_fd = open(LOCK_PATH, 'w')
373+
local_mode_lock = local_mode_lock_fd.fileno()
373374
data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
374375
script_path = os.path.join(data_path, 'mnist.py')
375376

@@ -392,7 +393,13 @@ def test_local_transform_mxnet(sagemaker_local_session, tmpdir):
392393
output_path = 'file://%s' % (str(tmpdir))
393394
transformer = mx.transformer(1, 'local', assemble_with='Line', max_payload=1,
394395
strategy='SingleRecord', output_path=output_path)
396+
397+
# Since Local Mode uses the same port for serving, we need a lock in order
398+
# to allow concurrent test execution.
399+
fcntl.lockf(local_mode_lock, fcntl.LOCK_EX)
395400
transformer.transform(transform_input, content_type='text/csv', split_type='Line')
396401
transformer.wait()
402+
time.sleep(5)
403+
fcntl.lockf(local_mode_lock, fcntl.LOCK_UN)
397404

398405
assert os.path.exists(os.path.join(str(tmpdir), 'data.csv.out'))

tests/unit/test_image.py

+21
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,27 @@ def test_train_local_code(download_folder, _cleanup, popen, _stream_output,
334334
assert '%s:/opt/ml/shared' % shared_folder_path in volumes
335335

336336

337+
def test_container_has_gpu_support(tmpdir, sagemaker_session):
338+
instance_count = 1
339+
image = 'my-image'
340+
sagemaker_container = _SageMakerContainer('local_gpu', instance_count, image,
341+
sagemaker_session=sagemaker_session)
342+
343+
docker_host = sagemaker_container._create_docker_host('host-1', {}, set(), 'train', [])
344+
assert 'runtime' in docker_host
345+
assert docker_host['runtime'] == 'nvidia'
346+
347+
348+
def test_container_does_not_enable_nvidia_docker_for_cpu_containers(tmpdir, sagemaker_session):
349+
instance_count = 1
350+
image = 'my-image'
351+
sagemaker_container = _SageMakerContainer('local', instance_count, image,
352+
sagemaker_session=sagemaker_session)
353+
354+
docker_host = sagemaker_container._create_docker_host('host-1', {}, set(), 'train', [])
355+
assert 'runtime' not in docker_host
356+
357+
337358
@patch('sagemaker.local.image._HostingContainer.run')
338359
@patch('shutil.copy')
339360
@patch('shutil.copytree')

0 commit comments

Comments
 (0)