Skip to content

fix: Prevent repack_model script from referencing nonexistent directories #2755

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 15 additions & 12 deletions src/sagemaker/workflow/_repack_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,15 @@ def repack(inference_script, model_archive, dependencies=None, source_dir=None):
with tarfile.open(name=local_path, mode="r:gz") as tf:
tf.extractall(path=src_dir)

# copy the custom inference script to code/
entry_point = os.path.join("/opt/ml/code", inference_script)
shutil.copy2(entry_point, os.path.join(src_dir, "code", inference_script))

# copy source_dir to code/
if source_dir:
# copy /opt/ml/code to code/
if os.path.exists(code_dir):
shutil.rmtree(code_dir)
shutil.copytree(source_dir, code_dir)
shutil.copytree("/opt/ml/code", code_dir)
else:
# copy the custom inference script to code/
entry_point = os.path.join("/opt/ml/code", inference_script)
shutil.copy2(entry_point, os.path.join(code_dir, inference_script))

# copy any dependencies to code/lib/
if dependencies:
Expand All @@ -79,13 +79,16 @@ def repack(inference_script, model_archive, dependencies=None, source_dir=None):
lib_dir = os.path.join(code_dir, "lib")
if not os.path.exists(lib_dir):
os.mkdir(lib_dir)
if os.path.isdir(actual_dependency_path):
shutil.copytree(
actual_dependency_path,
os.path.join(lib_dir, os.path.basename(actual_dependency_path)),
)
else:
if os.path.isfile(actual_dependency_path):
shutil.copy2(actual_dependency_path, lib_dir)
else:
if os.path.exists(lib_dir):
shutil.rmtree(lib_dir)
# a directory is in the dependencies. we have to copy
# all of /opt/ml/code into the lib dir because the original directory
# was flattened by the SDK training job upload..
shutil.copytree("/opt/ml/code", lib_dir)
break

# copy the "src" dir, which includes the previous training job's model and the
# custom inference script, to the output of this training job
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/sagemaker/workflow/test_repack_model_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test_repack_with_dependencies(tmp):
_repack_model.repack(
inference_script="inference.py",
model_archive=model_tar_name,
dependencies=["dependencies/a", "bb", "dependencies/some/dir"],
dependencies="dependencies/a bb dependencies/some/dir",
)

# /opt/ml/model should now have the original model and the inference script
Expand Down Expand Up @@ -145,7 +145,7 @@ def test_repack_with_source_dir_and_dependencies(tmp):
_repack_model.repack(
inference_script="inference.py",
model_archive=model_tar_name,
dependencies=["dependencies/a", "bb", "dependencies/some/dir"],
dependencies="dependencies/a bb dependencies/some/dir",
source_dir="sourcedir",
)

Expand Down