Skip to content

Commit 94aa0cd

Browse files
Shift approach to requiring/validating name sourcedir
1 parent d2a0f46 commit 94aa0cd

File tree

1 file changed

+13
-6
lines changed

1 file changed

+13
-6
lines changed

src/sagemaker/processing.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1587,13 +1587,13 @@ def run( # type: ignore[override]
15871587
framework script to run.Path (absolute or relative) to the local
15881588
Python source file which should be executed as the entry point
15891589
to training. When `code` is an S3 URI, ignore `source_dir`,
1590-
`dependencies, and `git_config`. If ``source_dir`` is specified,
1590+
`dependencies`, and `git_config`. If ``source_dir`` is specified,
15911591
then ``code`` must point to a file located at the root of ``source_dir``.
15921592
source_dir (str): Path (absolute, relative or an S3 URI) to a directory
15931593
with any other processing source code dependencies aside from the entry
15941594
point file (default: None). If ``source_dir`` is an S3 URI, it must
1595-
point to a tar.gz file. Structure within this directory are preserved
1596-
when processing on Amazon SageMaker (default: None).
1595+
point to a file named `sourcedir.tar.gz`. Structure within this directory
1596+
are preserved when processing on Amazon SageMaker (default: None).
15971597
dependencies (list[str]): A list of paths to directories (absolute
15981598
or relative) with any additional libraries that will be exported
15991599
to the container (default: []). The library folders will be
@@ -1730,9 +1730,16 @@ def _pack_and_upload_code(
17301730
"sagemaker_session unspecified when creating your Processor to have one set up "
17311731
"automatically."
17321732
)
1733-
1734-
# Upload the bootstrapping code as s3://.../jobname/source/runproc.sh.
1735-
entrypoint_s3_uri = estimator.uploaded_code.s3_prefix.rsplit("/", 1)[0] + "/runproc.sh"
1733+
if ("sourcedir.tar.gz" in estimator.uploaded_code.s3_prefix):
1734+
# Upload the bootstrapping code as s3://.../jobname/source/runproc.sh.
1735+
entrypoint_s3_uri = estimator.uploaded_code.s3_prefix.replace(
1736+
"sourcedir.tar.gz",
1737+
"runproc.sh",
1738+
)
1739+
else:
1740+
raise RuntimeError(
1741+
"S3 source_dir file must be named `sourcedir.tar.gz.`"
1742+
)
17361743

17371744
script = estimator.uploaded_code.script_name
17381745
s3_runproc_sh = S3Uploader.upload_string_as_file_body(

0 commit comments

Comments
 (0)