Skip to content

Commit aa5d3b2

Browse files
jmahliknavinns
authored andcommitted
fix: local mode deletion of temp files on job end
1 parent 11ff7c0 commit aa5d3b2

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

src/sagemaker/local/utils.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,20 @@
1414
from __future__ import absolute_import
1515

1616
import os
17+
import logging
1718
import shutil
1819
import subprocess
1920
import json
2021
import re
22+
import errno
2123

2224
from distutils.dir_util import copy_tree
2325
from six.moves.urllib.parse import urlparse
2426

2527
from sagemaker import s3
2628

29+
logger = logging.getLogger(__name__)
30+
2731

2832
def copy_directory_structure(destination_directory, relative_path):
2933
"""Creates intermediate directory structure for relative_path.
@@ -77,7 +81,19 @@ def move_to_destination(source, destination, job_name, sagemaker_session):
7781
else:
7882
raise ValueError("Invalid destination URI, must be s3:// or file://, got: %s" % destination)
7983

80-
shutil.rmtree(source)
84+
try:
85+
shutil.rmtree(source)
86+
except OSError as exc:
87+
# on Linux, when docker writes to any mounted volume, it uses the container's user. In most
88+
# cases this is root. When the container exits and we try to delete them we can't because
89+
# root owns those files. We expect this to happen, so we handle EACCESS. Any other error
90+
# we will raise the exception up.
91+
if exc.errno == errno.EACCES:
92+
logger.warning("Failed to delete: %s Please remove it manually.", source)
93+
else:
94+
logger.error("Failed to delete: %s", source)
95+
raise
96+
8197
return final_uri
8298

8399

tests/unit/sagemaker/local/test_local_utils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from __future__ import absolute_import
1414

1515
import os
16+
import errno
1617
import pytest
1718
from mock import patch, Mock
1819

@@ -165,3 +166,20 @@ def test_get_using_dot_notation_key_error():
165166
def test_get_using_dot_notation_index_error():
166167
with pytest.raises(ValueError):
167168
sagemaker.local.utils.get_using_dot_notation({"foo": ["bar"]}, "foo[1]")
169+
170+
171+
def raise_os_error(args):
172+
err = OSError()
173+
err.errno = errno.EACCES
174+
raise err
175+
176+
177+
@patch("shutil.rmtree", side_effect=raise_os_error)
178+
@patch("sagemaker.local.utils.recursive_copy")
179+
def test_move_to_destination_local_root_failure(recursive_copy, mock_rmtree):
180+
# This should not raise, in case root owns files, make sure it doesn't
181+
sagemaker.local.utils.move_to_destination("/tmp/data", "file:///target/dir/", "job", None)
182+
mock_rmtree.assert_called_once()
183+
recursive_copy.assert_called_with(
184+
"/tmp/data", os.path.abspath(os.path.join(os.sep, "target", "dir"))
185+
)

0 commit comments

Comments
 (0)