|
41 | 41 | secondary_training_status_changed,
|
42 | 42 | secondary_training_status_message,
|
43 | 43 | sts_regional_endpoint,
|
| 44 | + retries, |
44 | 45 | )
|
45 | 46 | from sagemaker import exceptions
|
46 | 47 | from sagemaker.session_settings import SessionSettings
|
@@ -4691,21 +4692,30 @@ def _train_done(sagemaker_client, job_name, last_desc):
|
4691 | 4692 | """Placeholder docstring"""
|
4692 | 4693 | in_progress_statuses = ["InProgress", "Created"]
|
4693 | 4694 |
|
4694 |
| - desc = sagemaker_client.describe_training_job(TrainingJobName=job_name) |
4695 |
| - status = desc["TrainingJobStatus"] |
| 4695 | + for _ in retries( |
| 4696 | + max_retry_count=10, # 10*30 = 5min |
| 4697 | + exception_message_prefix="Waiting for schedule to leave 'Pending' status", |
| 4698 | + seconds_to_sleep=30, |
| 4699 | + ): |
| 4700 | + try: |
| 4701 | + desc = sagemaker_client.describe_training_job(TrainingJobName=job_name) |
| 4702 | + status = desc["TrainingJobStatus"] |
4696 | 4703 |
|
4697 |
| - if secondary_training_status_changed(desc, last_desc): |
4698 |
| - print() |
4699 |
| - print(secondary_training_status_message(desc, last_desc), end="") |
4700 |
| - else: |
4701 |
| - print(".", end="") |
4702 |
| - sys.stdout.flush() |
| 4704 | + if secondary_training_status_changed(desc, last_desc): |
| 4705 | + print() |
| 4706 | + print(secondary_training_status_message(desc, last_desc), end="") |
| 4707 | + else: |
| 4708 | + print(".", end="") |
| 4709 | + sys.stdout.flush() |
4703 | 4710 |
|
4704 |
| - if status in in_progress_statuses: |
4705 |
| - return desc, False |
| 4711 | + if status in in_progress_statuses: |
| 4712 | + return desc, False |
4706 | 4713 |
|
4707 |
| - print() |
4708 |
| - return desc, True |
| 4714 | + print() |
| 4715 | + return desc, True |
| 4716 | + except botocore.exceptions.ClientError as err: |
| 4717 | + if err.response["Error"]["Code"] == "AccessDeniedException": |
| 4718 | + pass |
4709 | 4719 |
|
4710 | 4720 |
|
4711 | 4721 | def _processing_job_status(sagemaker_client, job_name):
|
|
0 commit comments