Skip to content

fix FileNotFoundError for entry_point without source_dir #510

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 22, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 15 additions & 13 deletions src/sagemaker/fw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,27 +131,29 @@ def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory, depend
Returns:
sagemaker.fw_utils.UserCode: An object with the S3 bucket and key (S3 prefix) and script name.
"""
dependencies = dependencies or []
key = '%s/sourcedir.tar.gz' % s3_key_prefix

if directory and directory.lower().startswith('s3://'):
return UploadedCode(s3_prefix=directory, script_name=os.path.basename(script))
else:
tmp = tempfile.mkdtemp()

try:
source_files = _list_files_to_compress(script, directory) + dependencies
tar_file = sagemaker.utils.create_tar_file(source_files, os.path.join(tmp, _TAR_SOURCE_FILENAME))
script_name = script if directory else os.path.basename(script)
dependencies = dependencies or []
key = '%s/sourcedir.tar.gz' % s3_key_prefix
tmp = tempfile.mkdtemp()

session.resource('s3').Object(bucket, key).upload_file(tar_file)
finally:
shutil.rmtree(tmp)
try:
source_files = _list_files_to_compress(script, directory) + dependencies
tar_file = sagemaker.utils.create_tar_file(source_files, os.path.join(tmp, _TAR_SOURCE_FILENAME))

script_name = script if directory else os.path.basename(script)
return UploadedCode(s3_prefix='s3://%s/%s' % (bucket, key), script_name=script_name)
session.resource('s3').Object(bucket, key).upload_file(tar_file)
finally:
shutil.rmtree(tmp)

return UploadedCode(s3_prefix='s3://%s/%s' % (bucket, key), script_name=script_name)


def _list_files_to_compress(script, directory):
if directory is None:
return [script]

basedir = directory if directory else os.path.dirname(script)
return [os.path.join(basedir, name) for name in os.listdir(basedir)]

Expand Down
38 changes: 37 additions & 1 deletion tests/unit/test_fw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import pytest
from mock import Mock, patch

from contextlib import contextmanager
from sagemaker import fw_utils
from sagemaker.utils import name_from_image

Expand All @@ -30,6 +31,14 @@
TIMESTAMP = '2017-10-10-14-14-15'


@contextmanager
def cd(path):
old_dir = os.getcwd()
os.chdir(path)
yield
os.chdir(old_dir)


@pytest.fixture()
def sagemaker_session():
boto_mock = Mock(name='boto_session', region_name=REGION)
Expand Down Expand Up @@ -132,7 +141,7 @@ def test_validate_source_dir_file_not_in_dir():


def test_tar_and_upload_dir_not_s3(sagemaker_session):
bucket = 'mybucker'
bucket = 'mybucket'
s3_key_prefix = 'something/source'
script = os.path.basename(__file__)
directory = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
Expand Down Expand Up @@ -166,6 +175,33 @@ def test_tar_and_upload_dir_no_directory(sagemaker_session, tmpdir):
assert {'/train.py'} == list_source_dir_files(sagemaker_session, tmpdir)


def test_tar_and_upload_dir_no_directory_only_entrypoint(sagemaker_session, tmpdir):
source_dir = file_tree(tmpdir, ['train.py', 'not_me.py'])
entrypoint = os.path.join(source_dir, 'train.py')

with patch('shutil.rmtree'):
result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', entrypoint, None)

assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz',
script_name='train.py')

assert {'/train.py'} == list_source_dir_files(sagemaker_session, tmpdir)


def test_tar_and_upload_dir_no_directory_bare_filename(sagemaker_session, tmpdir):
source_dir = file_tree(tmpdir, ['train.py'])
entrypoint = 'train.py'

with patch('shutil.rmtree'):
with cd(source_dir):
result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', entrypoint, None)

assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz',
script_name='train.py')

assert {'/train.py'} == list_source_dir_files(sagemaker_session, tmpdir)


def test_tar_and_upload_dir_with_directory(sagemaker_session, tmpdir):
file_tree(tmpdir, ['src-dir/train.py'])
source_dir = os.path.join(str(tmpdir), 'src-dir')
Expand Down