Skip to content

fix: local mode deletion of temp files on job end #3017

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion src/sagemaker/local/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,20 @@
from __future__ import absolute_import

import os
import logging
import shutil
import subprocess
import json
import re
import errno

from distutils.dir_util import copy_tree
from six.moves.urllib.parse import urlparse

from sagemaker import s3

logger = logging.getLogger(__name__)


def copy_directory_structure(destination_directory, relative_path):
"""Creates intermediate directory structure for relative_path.
Expand Down Expand Up @@ -77,7 +81,19 @@ def move_to_destination(source, destination, job_name, sagemaker_session):
else:
raise ValueError("Invalid destination URI, must be s3:// or file://, got: %s" % destination)

shutil.rmtree(source)
try:
shutil.rmtree(source)
except OSError as exc:
# on Linux, when docker writes to any mounted volume, it uses the container's user. In most
# cases this is root. When the container exits and we try to delete them we can't because
# root owns those files. We expect this to happen, so we handle EACCESS. Any other error
# we will raise the exception up.
if exc.errno == errno.EACCES:
logger.warning("Failed to delete: %s Please remove it manually.", source)
else:
logger.error("Failed to delete: %s", source)
raise

return final_uri


Expand Down
18 changes: 18 additions & 0 deletions tests/unit/sagemaker/local/test_local_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from __future__ import absolute_import

import os
import errno
import pytest
from mock import patch, Mock

Expand Down Expand Up @@ -165,3 +166,20 @@ def test_get_using_dot_notation_key_error():
def test_get_using_dot_notation_index_error():
with pytest.raises(ValueError):
sagemaker.local.utils.get_using_dot_notation({"foo": ["bar"]}, "foo[1]")


def raise_os_error(args):
err = OSError()
err.errno = errno.EACCES
raise err


@patch("shutil.rmtree", side_effect=raise_os_error)
@patch("sagemaker.local.utils.recursive_copy")
def test_move_to_destination_local_root_failure(recursive_copy, mock_rmtree):
# This should not raise, in case root owns files, make sure it doesn't
sagemaker.local.utils.move_to_destination("/tmp/data", "file:///target/dir/", "job", None)
mock_rmtree.assert_called_once()
recursive_copy.assert_called_with(
"/tmp/data", os.path.abspath(os.path.join(os.sep, "target", "dir"))
)