-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Replaced generic ValueError with custom subclass when reporting unexpected resource status #855
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"). You | ||
# may not use this file except in compliance with the License. A copy of | ||
# the License is located at | ||
# | ||
# http://aws.amazon.com/apache2.0/ | ||
# | ||
# or in the "license" file accompanying this file. This file is | ||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF | ||
# ANY KIND, either express or implied. See the License for the specific | ||
# language governing permissions and limitations under the License. | ||
from __future__ import absolute_import | ||
|
||
|
||
class UnexpectedStatusException(ValueError): | ||
"""Raised when resource status is not expected and thus not allowed for further execution""" | ||
def __init__(self, message, allowed_statuses, actual_status): | ||
self.allowed_statuses = allowed_statuses | ||
self.actual_status = actual_status | ||
super(UnexpectedStatusException, self).__init__(message) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ | |
secondary_training_status_changed, | ||
secondary_training_status_message, | ||
) | ||
from sagemaker import exceptions | ||
|
||
LOGGER = logging.getLogger("sagemaker") | ||
|
||
|
@@ -793,10 +794,10 @@ def wait_for_model_package(self, model_package_name, poll=5): | |
|
||
if status != "Completed": | ||
reason = desc.get("FailureReason", None) | ||
raise ValueError( | ||
"Error creating model package {}: {} Reason: {}".format( | ||
model_package_name, status, reason | ||
) | ||
raise exceptions.UnexpectedStatusException( | ||
message="Error creating model package {}: {} Reason: {}".format(model_package_name, status, reason), | ||
allowed_statuses=["Completed"], | ||
actual_status=status | ||
) | ||
return desc | ||
|
||
|
@@ -948,7 +949,7 @@ def wait_for_job(self, job, poll=5): | |
(dict): Return value from the ``DescribeTrainingJob`` API. | ||
|
||
Raises: | ||
ValueError: If the training job fails. | ||
exceptions.UnexpectedStatusException: If the training job fails. | ||
""" | ||
desc = _wait_until_training_done( | ||
lambda last_desc: _train_done(self.sagemaker_client, job, last_desc), None, poll | ||
|
@@ -967,7 +968,7 @@ def wait_for_compilation_job(self, job, poll=5): | |
(dict): Return value from the ``DescribeCompilationJob`` API. | ||
|
||
Raises: | ||
ValueError: If the compilation job fails. | ||
exceptions.UnexpectedStatusException: If the compilation job fails. | ||
""" | ||
desc = _wait_until(lambda: _compilation_job_status(self.sagemaker_client, job), poll) | ||
self._check_job_status(job, desc, "CompilationJobStatus") | ||
|
@@ -984,7 +985,7 @@ def wait_for_tuning_job(self, job, poll=5): | |
(dict): Return value from the ``DescribeHyperParameterTuningJob`` API. | ||
|
||
Raises: | ||
ValueError: If the hyperparameter tuning job fails. | ||
exceptions.UnexpectedStatusException: If the hyperparameter tuning job fails. | ||
""" | ||
desc = _wait_until(lambda: _tuning_job_status(self.sagemaker_client, job), poll) | ||
self._check_job_status(job, desc, "HyperParameterTuningJobStatus") | ||
|
@@ -1001,23 +1002,23 @@ def wait_for_transform_job(self, job, poll=5): | |
(dict): Return value from the ``DescribeTransformJob`` API. | ||
|
||
Raises: | ||
ValueError: If the transform job fails. | ||
exceptions.UnexpectedStatusException: If the transform job fails. | ||
""" | ||
desc = _wait_until(lambda: _transform_job_status(self.sagemaker_client, job), poll) | ||
self._check_job_status(job, desc, "TransformJobStatus") | ||
return desc | ||
|
||
def _check_job_status(self, job, desc, status_key_name): | ||
"""Check to see if the job completed successfully and, if not, construct and | ||
raise a ValueError. | ||
raise a exceptions.UnexpectedStatusException. | ||
|
||
Args: | ||
job (str): The name of the job to check. | ||
desc (dict[str, str]): The result of ``describe_training_job()``. | ||
status_key_name (str): Status key name to check for. | ||
|
||
Raises: | ||
ValueError: If the training job fails. | ||
exceptions.UnexpectedStatusException: If the training job fails. | ||
""" | ||
status = desc[status_key_name] | ||
# If the status is capital case, then convert it to Camel case | ||
|
@@ -1026,7 +1027,11 @@ def _check_job_status(self, job, desc, status_key_name): | |
if status != "Completed" and status != "Stopped": | ||
reason = desc.get("FailureReason", "(No reason provided)") | ||
job_type = status_key_name.replace("JobStatus", " job") | ||
raise ValueError("Error for {} {}: {} Reason: {}".format(job_type, job, status, reason)) | ||
raise exceptions.UnexpectedStatusException( | ||
message="Error for {} {}: {} Reason: {}".format(job_type, job, status, reason), | ||
allowed_statuses=["Completed", "Stopped"], | ||
actual_status=status | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need unit tests here as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
def wait_for_endpoint(self, endpoint, poll=5): | ||
"""Wait for an Amazon SageMaker endpoint deployment to complete. | ||
|
@@ -1043,8 +1048,10 @@ def wait_for_endpoint(self, endpoint, poll=5): | |
|
||
if status != "InService": | ||
reason = desc.get("FailureReason", None) | ||
raise ValueError( | ||
"Error hosting endpoint {}: {} Reason: {}".format(endpoint, status, reason) | ||
raise exceptions.UnexpectedStatusException( | ||
message="Error hosting endpoint {}: {} Reason: {}".format(endpoint, status, reason), | ||
allowed_statuses=["InService"], | ||
actual_status=status | ||
) | ||
return desc | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please include unit tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
|
@@ -1278,7 +1285,7 @@ def logs_for_job( # noqa: C901 - suppress complexity warning for this method | |
poll (int): The interval in seconds between polling for new log entries and job completion (default: 5). | ||
|
||
Raises: | ||
ValueError: If waiting and the training job fails. | ||
exceptions.UnexpectedStatusException: If waiting and the training job fails. | ||
""" | ||
|
||
description = self.sagemaker_client.describe_training_job(TrainingJobName=job_name) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"). You | ||
# may not use this file except in compliance with the License. A copy of | ||
# the License is located at | ||
# | ||
# http://aws.amazon.com/apache2.0/ | ||
# | ||
# or in the "license" file accompanying this file. This file is | ||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF | ||
# ANY KIND, either express or implied. See the License for the specific | ||
# language governing permissions and limitations under the License. | ||
from __future__ import absolute_import | ||
|
||
import pytest | ||
from mock import Mock, MagicMock | ||
import sagemaker | ||
|
||
EXPANDED_ROLE = 'arn:aws:iam::111111111111:role/ExpandedRole' | ||
REGION = 'us-west-2' | ||
MODEL_PACKAGE_NAME = 'my_model_package' | ||
JOB_NAME = 'my_job_name' | ||
ENDPOINT_NAME = 'the_point_of_end' | ||
|
||
|
||
def get_sagemaker_session(returns_status): | ||
boto_mock = Mock(name='boto_session', region_name=REGION) | ||
client_mock = Mock() | ||
client_mock.describe_model_package = MagicMock(return_value={'ModelPackageStatus': returns_status}) | ||
client_mock.describe_endpoint = MagicMock(return_value={'EndpointStatus': returns_status}) | ||
ims = sagemaker.Session(boto_session=boto_mock, sagemaker_client=client_mock) | ||
ims.expand_role = Mock(return_value=EXPANDED_ROLE) | ||
return ims | ||
|
||
|
||
def test_does_not_raise_when_successfully_created_package(): | ||
try: | ||
sagemaker_session = get_sagemaker_session(returns_status='Completed') | ||
sagemaker_session.wait_for_model_package(MODEL_PACKAGE_NAME) | ||
except sagemaker.exceptions.UnexpectedStatusException: | ||
pytest.fail("UnexpectedStatusException was thrown while it should not") | ||
|
||
|
||
def test_raise_when_failed_created_package(): | ||
try: | ||
sagemaker_session = get_sagemaker_session(returns_status='EnRoute') | ||
sagemaker_session.wait_for_model_package(MODEL_PACKAGE_NAME) | ||
assert False, 'sagemaker.exceptions.UnexpectedStatusException should have been raised but was not' | ||
except Exception as e: | ||
assert type(e) == sagemaker.exceptions.UnexpectedStatusException | ||
assert e.actual_status == 'EnRoute' | ||
assert 'Completed' in e.allowed_statuses | ||
|
||
|
||
def test_does_not_raise_when_correct_job_status(): | ||
try: | ||
job = Mock() | ||
sagemaker_session = get_sagemaker_session(returns_status='Stopped') | ||
sagemaker_session._check_job_status(job, {'TransformationJobStatus': 'Stopped'}, 'TransformationJobStatus') | ||
except sagemaker.exceptions.UnexpectedStatusException: | ||
pytest.fail("UnexpectedStatusException was thrown while it should not") | ||
|
||
|
||
def test_does_raise_when_incorrect_job_status(): | ||
try: | ||
job = Mock() | ||
sagemaker_session = get_sagemaker_session(returns_status='Failed') | ||
sagemaker_session._check_job_status(job, {'TransformationJobStatus': 'Failed'}, 'TransformationJobStatus') | ||
assert False, 'sagemaker.exceptions.UnexpectedStatusException should have been raised but was not' | ||
except Exception as e: | ||
assert type(e) == sagemaker.exceptions.UnexpectedStatusException | ||
assert e.actual_status == 'Failed' | ||
assert 'Completed' in e.allowed_statuses | ||
assert 'Stopped' in e.allowed_statuses | ||
|
||
|
||
def test_does_not_raise_when_successfully_deployed_endpoint(): | ||
try: | ||
sagemaker_session = get_sagemaker_session(returns_status='InService') | ||
sagemaker_session.wait_for_endpoint(ENDPOINT_NAME) | ||
except sagemaker.exceptions.UnexpectedStatusException: | ||
pytest.fail("UnexpectedStatusException was thrown while it should not") | ||
|
||
|
||
def test_raise_when_failed_to_deploy_endpoint(): | ||
try: | ||
sagemaker_session = get_sagemaker_session(returns_status='Failed') | ||
assert sagemaker_session.wait_for_endpoint(ENDPOINT_NAME) | ||
assert False, 'sagemaker.exceptions.UnexpectedStatusException should have been raised but was not' | ||
except Exception as e: | ||
assert type(e) == sagemaker.exceptions.UnexpectedStatusException | ||
assert e.actual_status == 'Failed' | ||
assert 'InService' in e.allowed_statuses |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add copyright.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added