Skip to content

Commit c554fb1

Browse files
committed
describing training job call every 30 seconds
1 parent 6a6d4c6 commit c554fb1

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

src/sagemaker/session.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ def logs_for_job(self, job_name, wait=False, poll=5): # noqa: C901 - suppress c
605605
# Notes:
606606
# - The JOB_COMPLETE state forces us to do an extra pause and read any items that got to Cloudwatch after
607607
# the job was marked complete.
608-
608+
seconds_since_last_describe_job_call = 30
609609
while True:
610610
if len(stream_names) < instance_count:
611611
# Log streams are created whenever a container starts writing to stdout/err, so this list
@@ -645,8 +645,12 @@ def logs_for_job(self, job_name, wait=False, poll=5): # noqa: C901 - suppress c
645645

646646
if state == LogState.JOB_COMPLETE:
647647
state = LogState.COMPLETE
648+
elif seconds_since_last_describe_job_call < 30:
649+
seconds_since_last_describe_job_call += max(poll, 1)
648650
else:
649651
description = self.sagemaker_client.describe_training_job(TrainingJobName=job_name)
652+
seconds_since_last_describe_job_call = 0
653+
650654
status = description['TrainingJobStatus']
651655

652656
if status == 'Completed' or status == 'Failed':

0 commit comments

Comments
 (0)