Skip to content

Commit dcca7b7

Browse files
authored
fix: local mode deletion of temp files on job end (#3017)
1 parent 2697b21 commit dcca7b7

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

src/sagemaker/local/utils.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,20 @@
1414
from __future__ import absolute_import
1515

1616
import os
17+
import logging
1718
import shutil
1819
import subprocess
1920
import json
2021
import re
22+
import errno
2123

2224
from distutils.dir_util import copy_tree
2325
from six.moves.urllib.parse import urlparse
2426

2527
from sagemaker import s3
2628

29+
logger = logging.getLogger(__name__)
30+
2731

2832
def copy_directory_structure(destination_directory, relative_path):
2933
"""Creates intermediate directory structure for relative_path.
@@ -77,7 +81,19 @@ def move_to_destination(source, destination, job_name, sagemaker_session):
7781
else:
7882
raise ValueError("Invalid destination URI, must be s3:// or file://, got: %s" % destination)
7983

80-
shutil.rmtree(source)
84+
try:
85+
shutil.rmtree(source)
86+
except OSError as exc:
87+
# on Linux, when docker writes to any mounted volume, it uses the container's user. In most
88+
# cases this is root. When the container exits and we try to delete them we can't because
89+
# root owns those files. We expect this to happen, so we handle EACCESS. Any other error
90+
# we will raise the exception up.
91+
if exc.errno == errno.EACCES:
92+
logger.warning("Failed to delete: %s Please remove it manually.", source)
93+
else:
94+
logger.error("Failed to delete: %s", source)
95+
raise
96+
8197
return final_uri
8298

8399

tests/unit/sagemaker/local/test_local_utils.py

+18
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from __future__ import absolute_import
1414

1515
import os
16+
import errno
1617
import pytest
1718
from mock import patch, Mock
1819

@@ -168,3 +169,20 @@ def test_get_using_dot_notation_key_error():
168169
def test_get_using_dot_notation_index_error():
169170
with pytest.raises(ValueError):
170171
sagemaker.local.utils.get_using_dot_notation({"foo": ["bar"]}, "foo[1]")
172+
173+
174+
def raise_os_error(args):
175+
err = OSError()
176+
err.errno = errno.EACCES
177+
raise err
178+
179+
180+
@patch("shutil.rmtree", side_effect=raise_os_error)
181+
@patch("sagemaker.local.utils.recursive_copy")
182+
def test_move_to_destination_local_root_failure(recursive_copy, mock_rmtree):
183+
# This should not raise, in case root owns files, make sure it doesn't
184+
sagemaker.local.utils.move_to_destination("/tmp/data", "file:///target/dir/", "job", None)
185+
mock_rmtree.assert_called_once()
186+
recursive_copy.assert_called_with(
187+
"/tmp/data", os.path.abspath(os.path.join(os.sep, "target", "dir"))
188+
)

0 commit comments

Comments
 (0)