Skip to content

Commit 1bef225

Browse files
author
Jaya Kasiraj
committed
fix: remove historical job_name caching which causes job names to exceed 63 char length
1 parent 7fc9868 commit 1bef225

File tree

1 file changed

+0
-43
lines changed

1 file changed

+0
-43
lines changed

src/sagemaker/workflow/steps.py

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -493,19 +493,6 @@ def __init__(
493493
DeprecationWarning,
494494
)
495495

496-
self.job_name = None
497-
if estimator and (estimator.source_dir or estimator.entry_point):
498-
# By default, `Estimator` will upload the local code to an S3 path
499-
# containing a timestamp. This causes cache misses whenever a
500-
# pipeline is updated, even if the underlying script hasn't changed.
501-
# To avoid this, hash the contents of the training script and include it
502-
# in the `job_name` passed to the `Estimator`, which will be used
503-
# instead of the timestamped path.
504-
if not is_pipeline_variable(estimator.source_dir) and not is_pipeline_variable(
505-
estimator.entry_point
506-
):
507-
self.job_name = self._generate_code_upload_path()
508-
509496
@property
510497
def arguments(self) -> RequestType:
511498
"""The arguments dictionary that is used to call `create_training_job`.
@@ -554,26 +541,6 @@ def to_request(self) -> RequestType:
554541

555542
return request_dict
556543

557-
def _generate_code_upload_path(self) -> str or None:
558-
"""Generate an upload path for local training scripts based on their content."""
559-
from sagemaker.workflow.utilities import hash_files_or_dirs
560-
561-
if self.estimator.source_dir:
562-
source_dir_url = urlparse(self.estimator.source_dir)
563-
if source_dir_url.scheme == "" or source_dir_url.scheme == "file":
564-
code_hash = hash_files_or_dirs(
565-
[self.estimator.source_dir] + self.estimator.dependencies
566-
)
567-
return f"{self.name}-{code_hash}"[:1024]
568-
elif self.estimator.entry_point:
569-
entry_point_url = urlparse(self.estimator.entry_point)
570-
if entry_point_url.scheme == "" or entry_point_url.scheme == "file":
571-
code_hash = hash_files_or_dirs(
572-
[self.estimator.entry_point] + self.estimator.dependencies
573-
)
574-
return f"{self.name}-{code_hash}"[:1024]
575-
return None
576-
577544

578545
class CreateModelStep(ConfigurableRetryStep):
579546
"""`CreateModelStep` for SageMaker Pipelines Workflows."""
@@ -895,16 +862,6 @@ def __init__(
895862
"code argument has to be a valid S3 URI or local file path "
896863
+ "rather than a pipeline variable"
897864
)
898-
code_url = urlparse(code)
899-
if code_url.scheme == "" or code_url.scheme == "file":
900-
# By default, `Processor` will upload the local code to an S3 path
901-
# containing a timestamp. This causes cache misses whenever a
902-
# pipeline is updated, even if the underlying script hasn't changed.
903-
# To avoid this, hash the contents of the script and include it
904-
# in the `job_name` passed to the `Processor`, which will be used
905-
# instead of the timestamped path.
906-
self.job_name = self._generate_code_upload_path()
907-
908865
warnings.warn(
909866
(
910867
'We are deprecating the instantiation of ProcessingStep using "processor".'

0 commit comments

Comments
 (0)