Skip to content

fix FileNotFoundError for entry_point without source_dir #510

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 22, 2018
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
CHANGELOG
=========

1.15.2
======

* bug-fix: Fix FileNotFoundError for entry_point without source_dir
* doc-fix: Add missing feature 1.5.0 in change log
* doc-fix: Add README for airflow

1.15.1
======

Expand Down
2 changes: 1 addition & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __getattr__(cls, name):
'numpy', 'scipy', 'scipy.sparse']
sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)

version = '1.15.1'
version = '1.15.2'
project = u'sagemaker'

# Add any Sphinx extension module names here, as strings. They can be extensions
Expand Down
2 changes: 1 addition & 1 deletion src/sagemaker/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@
from sagemaker.session import s3_input # noqa: F401
from sagemaker.session import get_execution_role # noqa: F401

__version__ = '1.15.1'
__version__ = '1.15.2'
59 changes: 35 additions & 24 deletions src/sagemaker/fw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@
# language governing permissions and limitations under the License.
from __future__ import absolute_import

from collections import namedtuple

import os
import re
import sagemaker.utils
import shutil
import tempfile
from collections import namedtuple
from six.moves.urllib.parse import urlparse

import sagemaker.utils

_TAR_SOURCE_FILENAME = 'source.tar.gz'

UploadedCode = namedtuple('UserCode', ['s3_prefix', 'script_name'])
Expand Down Expand Up @@ -112,46 +112,57 @@ def validate_source_dir(script, directory):


def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory, dependencies=None):
"""Pack and upload source files to S3 only if directory is empty or local.
"""Package source files and upload a compress tar file to S3. The S3 location will be
``s3://<bucket>/s3_key_prefix/sourcedir.tar.gz``.

If directory is an S3 URI, an UploadedCode object will be returned, but nothing will be
uploaded to S3 (this allow reuse of code already in S3).

If directory is None, the script will be added to the archive at ``./<basename of script>``.

Note:
If the directory points to S3 no action is taken.
If directory is not None, the (recursive) contents of the directory will be added to
the archive. directory is treated as the base path of the archive, and the script name is
assumed to be a filename or relative path inside the directory.

Args:
session (boto3.Session): Boto session used to access S3.
bucket (str): S3 bucket to which the compressed file is uploaded.
s3_key_prefix (str): Prefix for the S3 key.
script (str): Script filename.
directory (str or None): Directory containing the source file. If it starts with
"s3://", no action is taken.
dependencies (List[str]): A list of paths to directories (absolute or relative)
script (str): Script filename or path.
directory (str): Optional. Directory containing the source file. If it starts with "s3://",
no action is taken.
dependencies (List[str]): Optional. A list of paths to directories (absolute or relative)
containing additional libraries that will be copied into
/opt/ml/lib

Returns:
sagemaker.fw_utils.UserCode: An object with the S3 bucket and key (S3 prefix) and script name.
sagemaker.fw_utils.UserCode: An object with the S3 bucket and key (S3 prefix) and
script name.
"""
dependencies = dependencies or []
key = '%s/sourcedir.tar.gz' % s3_key_prefix

if directory and directory.lower().startswith('s3://'):
return UploadedCode(s3_prefix=directory, script_name=os.path.basename(script))
else:
tmp = tempfile.mkdtemp()

try:
source_files = _list_files_to_compress(script, directory) + dependencies
tar_file = sagemaker.utils.create_tar_file(source_files, os.path.join(tmp, _TAR_SOURCE_FILENAME))
script_name = script if directory else os.path.basename(script)
dependencies = dependencies or []
key = '%s/sourcedir.tar.gz' % s3_key_prefix
tmp = tempfile.mkdtemp()

session.resource('s3').Object(bucket, key).upload_file(tar_file)
finally:
shutil.rmtree(tmp)
try:
source_files = _list_files_to_compress(script, directory) + dependencies
tar_file = sagemaker.utils.create_tar_file(source_files,
os.path.join(tmp, _TAR_SOURCE_FILENAME))

script_name = script if directory else os.path.basename(script)
return UploadedCode(s3_prefix='s3://%s/%s' % (bucket, key), script_name=script_name)
session.resource('s3').Object(bucket, key).upload_file(tar_file)
finally:
shutil.rmtree(tmp)

return UploadedCode(s3_prefix='s3://%s/%s' % (bucket, key), script_name=script_name)


def _list_files_to_compress(script, directory):
if directory is None:
return [script]

basedir = directory if directory else os.path.dirname(script)
return [os.path.join(basedir, name) for name in os.listdir(basedir)]

Expand Down
38 changes: 37 additions & 1 deletion tests/unit/test_fw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import pytest
from mock import Mock, patch

from contextlib import contextmanager
from sagemaker import fw_utils
from sagemaker.utils import name_from_image

Expand All @@ -30,6 +31,14 @@
TIMESTAMP = '2017-10-10-14-14-15'


@contextmanager
def cd(path):
old_dir = os.getcwd()
os.chdir(path)
yield
os.chdir(old_dir)


@pytest.fixture()
def sagemaker_session():
boto_mock = Mock(name='boto_session', region_name=REGION)
Expand Down Expand Up @@ -132,7 +141,7 @@ def test_validate_source_dir_file_not_in_dir():


def test_tar_and_upload_dir_not_s3(sagemaker_session):
bucket = 'mybucker'
bucket = 'mybucket'
s3_key_prefix = 'something/source'
script = os.path.basename(__file__)
directory = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
Expand Down Expand Up @@ -166,6 +175,33 @@ def test_tar_and_upload_dir_no_directory(sagemaker_session, tmpdir):
assert {'/train.py'} == list_source_dir_files(sagemaker_session, tmpdir)


def test_tar_and_upload_dir_no_directory_only_entrypoint(sagemaker_session, tmpdir):
source_dir = file_tree(tmpdir, ['train.py', 'not_me.py'])
entrypoint = os.path.join(source_dir, 'train.py')

with patch('shutil.rmtree'):
result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', entrypoint, None)

assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz',
script_name='train.py')

assert {'/train.py'} == list_source_dir_files(sagemaker_session, tmpdir)


def test_tar_and_upload_dir_no_directory_bare_filename(sagemaker_session, tmpdir):
source_dir = file_tree(tmpdir, ['train.py'])
entrypoint = 'train.py'

with patch('shutil.rmtree'):
with cd(source_dir):
result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', entrypoint, None)

assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz',
script_name='train.py')

assert {'/train.py'} == list_source_dir_files(sagemaker_session, tmpdir)


def test_tar_and_upload_dir_with_directory(sagemaker_session, tmpdir):
file_tree(tmpdir, ['src-dir/train.py'])
source_dir = os.path.join(str(tmpdir), 'src-dir')
Expand Down