Skip to content

change: fix integration test failures masked by timeout bug #977

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 13, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 2 additions & 9 deletions tests/integ/test_ipinsights.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@

import os

import pytest

from sagemaker import IPInsights, IPInsightsModel
from sagemaker.predictor import RealTimePredictor
from sagemaker.utils import unique_name_from_base
Expand All @@ -26,10 +24,6 @@
FEATURE_DIM = None


@pytest.mark.skip(
reason="This test has always failed, but the failure was masked by a bug. "
"This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
)
def test_ipinsights(sagemaker_session):
job_name = unique_name_from_base("ipinsights")

Expand Down Expand Up @@ -64,6 +58,5 @@ def test_ipinsights(sagemaker_session):
predict_input = [["user_1", "1.1.1.1"]]
result = predictor.predict(predict_input)

assert len(result) == 1
for record in result:
assert record.label["dot_product"] is not None
assert len(result["predictions"]) == 1
assert 0 > result["predictions"][0]["dot_product"] > -1 # We expect ~ -0.22
2 changes: 1 addition & 1 deletion tests/integ/test_marketplace.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
# Pre-Trained Model: Scikit Decision Trees - Pretrained Model
# https://aws.amazon.com/marketplace/pp/prodview-7qop4x5ahrdhe
#
# Both are written by Amazon and are free to subscribe.
# Both are written by Amazon and are free to subscribe.

ALGORITHM_ARN = (
"arn:aws:sagemaker:%s:%s:algorithm/scikit-decision-trees-"
Expand Down
63 changes: 34 additions & 29 deletions tests/integ/test_mxnet_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,6 @@ def test_deploy_model(mxnet_training_job, sagemaker_session, mxnet_full_version)
assert "Could not find model" in str(exception.value)


@pytest.mark.skip(
reason="This test has always failed, but the failure was masked by a bug. "
"This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
)
def test_deploy_model_with_tags_and_kms(mxnet_training_job, sagemaker_session, mxnet_full_version):
endpoint_name = "test-mxnet-deploy-model-{}".format(sagemaker_timestamp())

Expand All @@ -123,18 +119,20 @@ def test_deploy_model_with_tags_and_kms(mxnet_training_job, sagemaker_session, m

model.deploy(1, "ml.m4.xlarge", endpoint_name=endpoint_name, tags=tags, kms_key=kms_key_arn)

returned_model = sagemaker_session.describe_model(EndpointName=model.name)
returned_model_tags = sagemaker_session.list_tags(ResourceArn=returned_model["ModelArn"])[
"Tags"
]
returned_model = sagemaker_session.sagemaker_client.describe_model(ModelName=model.name)
returned_model_tags = sagemaker_session.sagemaker_client.list_tags(
ResourceArn=returned_model["ModelArn"]
)["Tags"]

endpoint = sagemaker_session.describe_endpoint(EndpointName=endpoint_name)
endpoint_tags = sagemaker_session.list_tags(ResourceArn=endpoint["EndpointArn"])["Tags"]
endpoint = sagemaker_session.sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
endpoint_tags = sagemaker_session.sagemaker_client.list_tags(
ResourceArn=endpoint["EndpointArn"]
)["Tags"]

endpoint_config = sagemaker_session.describe_endpoint_config(
endpoint_config = sagemaker_session.sagemaker_client.describe_endpoint_config(
EndpointConfigName=endpoint["EndpointConfigName"]
)
endpoint_config_tags = sagemaker_session.list_tags(
endpoint_config_tags = sagemaker_session.sagemaker_client.list_tags(
ResourceArn=endpoint_config["EndpointConfigArn"]
)["Tags"]

Expand All @@ -148,10 +146,6 @@ def test_deploy_model_with_tags_and_kms(mxnet_training_job, sagemaker_session, m
assert endpoint_config["KmsKeyId"] == kms_key_arn


@pytest.mark.skip(
reason="This test has always failed, but the failure was masked by a bug. "
"This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
)
def test_deploy_model_with_update_endpoint(
mxnet_training_job, sagemaker_session, mxnet_full_version
):
Expand All @@ -172,26 +166,37 @@ def test_deploy_model_with_update_endpoint(
framework_version=mxnet_full_version,
)
model.deploy(1, "ml.t2.medium", endpoint_name=endpoint_name)
old_endpoint = sagemaker_session.describe_endpoint(EndpointName=endpoint_name)
old_endpoint = sagemaker_session.sagemaker_client.describe_endpoint(
EndpointName=endpoint_name
)
old_config_name = old_endpoint["EndpointConfigName"]

model.deploy(1, "ml.m4.xlarge", update_endpoint=True, endpoint_name=endpoint_name)
new_endpoint = sagemaker_session.describe_endpoint(EndpointName=endpoint_name)[
"ProductionVariants"
]
new_production_variants = new_endpoint["ProductionVariants"]

# Wait for endpoint to finish updating
max_retry_count = 40 # Endpoint update takes ~7min. 40 retries * 30s sleeps = 20min timeout
current_retry_count = 0
while current_retry_count <= max_retry_count:
if current_retry_count >= max_retry_count:
raise Exception("Endpoint status not 'InService' within expected timeout.")
time.sleep(30)
new_endpoint = sagemaker_session.sagemaker_client.describe_endpoint(
EndpointName=endpoint_name
)
current_retry_count += 1
if new_endpoint["EndpointStatus"] == "InService":
break

new_config_name = new_endpoint["EndpointConfigName"]
new_config = sagemaker_session.sagemaker_client.describe_endpoint_config(
EndpointConfigName=new_config_name
)

assert old_config_name != new_config_name
assert new_production_variants["InstanceType"] == "ml.m4.xlarge"
assert new_production_variants["InitialInstanceCount"] == 1
assert new_production_variants["AcceleratorType"] is None
assert new_config["ProductionVariants"][0]["InstanceType"] == "ml.m4.xlarge"
assert new_config["ProductionVariants"][0]["InitialInstanceCount"] == 1


@pytest.mark.skip(
reason="This test has always failed, but the failure was masked by a bug. "
"This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
)
def test_deploy_model_with_update_non_existing_endpoint(
mxnet_training_job, sagemaker_session, mxnet_full_version
):
Expand All @@ -216,7 +221,7 @@ def test_deploy_model_with_update_non_existing_endpoint(
framework_version=mxnet_full_version,
)
model.deploy(1, "ml.t2.medium", endpoint_name=endpoint_name)
sagemaker_session.describe_endpoint(EndpointName=endpoint_name)
sagemaker_session.sagemaker_client.describe_endpoint(EndpointName=endpoint_name)

with pytest.raises(ValueError, message=expected_error_message):
model.deploy(
Expand Down
8 changes: 1 addition & 7 deletions tests/integ/test_tf_script_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,6 @@ def test_mnist_distributed(sagemaker_session, instance_type):
)


@pytest.mark.skip(
reason="This test has always failed, but the failure was masked by a bug. "
"This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
)
def test_mnist_async(sagemaker_session):
estimator = TensorFlow(
entry_point=SCRIPT,
Expand Down Expand Up @@ -168,9 +164,7 @@ def test_mnist_async(sagemaker_session):
result = predictor.predict(np.zeros(784))
print("predict result: {}".format(result))
_assert_endpoint_tags_match(sagemaker_session.sagemaker_client, predictor.endpoint, TAGS)
_assert_model_tags_match(
sagemaker_session.sagemaker_client, estimator.latest_training_job.name, TAGS
)
_assert_model_tags_match(sagemaker_session.sagemaker_client, model_name, TAGS)
_assert_model_name_match(sagemaker_session.sagemaker_client, endpoint_name, model_name)


Expand Down
4 changes: 0 additions & 4 deletions tests/integ/test_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,10 +803,6 @@ def test_tuning_chainer(sagemaker_session):


@pytest.mark.canary_quick
@pytest.mark.skip(
reason="This test has always failed, but the failure was masked by a bug. "
"This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
)
def test_attach_tuning_pytorch(sagemaker_session):
mnist_dir = os.path.join(DATA_DIR, "pytorch_mnist")
mnist_script = os.path.join(mnist_dir, "mnist.py")
Expand Down