Skip to content

Commit e864ea3

Browse files
committed
change: fix integration test failures masked by timeout bug
For background into the original issue, see aws#968
1 parent b7a2b9c commit e864ea3

File tree

5 files changed

+38
-48
lines changed

5 files changed

+38
-48
lines changed

tests/integ/test_ipinsights.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,6 @@
2626
FEATURE_DIM = None
2727

2828

29-
@pytest.mark.skip(
30-
reason="This test has always failed, but the failure was masked by a bug. "
31-
"This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
32-
)
3329
def test_ipinsights(sagemaker_session):
3430
job_name = unique_name_from_base("ipinsights")
3531

@@ -64,6 +60,5 @@ def test_ipinsights(sagemaker_session):
6460
predict_input = [["user_1", "1.1.1.1"]]
6561
result = predictor.predict(predict_input)
6662

67-
assert len(result) == 1
68-
for record in result:
69-
assert record.label["dot_product"] is not None
63+
assert len(result["predictions"]) == 1
64+
assert result["predictions"][0]["dot_product"] is not None

tests/integ/test_marketplace.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
# Pre-Trained Model: Scikit Decision Trees - Pretrained Model
3636
# https://aws.amazon.com/marketplace/pp/prodview-7qop4x5ahrdhe
3737
#
38-
# Both are written by Amazon and are free to subscribe.
38+
# Both are written by Amazon and are free to subscribe.
3939

4040
ALGORITHM_ARN = (
4141
"arn:aws:sagemaker:%s:%s:algorithm/scikit-decision-trees-"

tests/integ/test_mxnet_train.py

Lines changed: 34 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,6 @@ def test_deploy_model(mxnet_training_job, sagemaker_session, mxnet_full_version)
9696
assert "Could not find model" in str(exception.value)
9797

9898

99-
@pytest.mark.skip(
100-
reason="This test has always failed, but the failure was masked by a bug. "
101-
"This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
102-
)
10399
def test_deploy_model_with_tags_and_kms(mxnet_training_job, sagemaker_session, mxnet_full_version):
104100
endpoint_name = "test-mxnet-deploy-model-{}".format(sagemaker_timestamp())
105101

@@ -123,18 +119,20 @@ def test_deploy_model_with_tags_and_kms(mxnet_training_job, sagemaker_session, m
123119

124120
model.deploy(1, "ml.m4.xlarge", endpoint_name=endpoint_name, tags=tags, kms_key=kms_key_arn)
125121

126-
returned_model = sagemaker_session.describe_model(EndpointName=model.name)
127-
returned_model_tags = sagemaker_session.list_tags(ResourceArn=returned_model["ModelArn"])[
128-
"Tags"
129-
]
122+
returned_model = sagemaker_session.sagemaker_client.describe_model(ModelName=model.name)
123+
returned_model_tags = sagemaker_session.sagemaker_client.list_tags(
124+
ResourceArn=returned_model["ModelArn"]
125+
)["Tags"]
130126

131-
endpoint = sagemaker_session.describe_endpoint(EndpointName=endpoint_name)
132-
endpoint_tags = sagemaker_session.list_tags(ResourceArn=endpoint["EndpointArn"])["Tags"]
127+
endpoint = sagemaker_session.sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
128+
endpoint_tags = sagemaker_session.sagemaker_client.list_tags(
129+
ResourceArn=endpoint["EndpointArn"]
130+
)["Tags"]
133131

134-
endpoint_config = sagemaker_session.describe_endpoint_config(
132+
endpoint_config = sagemaker_session.sagemaker_client.describe_endpoint_config(
135133
EndpointConfigName=endpoint["EndpointConfigName"]
136134
)
137-
endpoint_config_tags = sagemaker_session.list_tags(
135+
endpoint_config_tags = sagemaker_session.sagemaker_client.list_tags(
138136
ResourceArn=endpoint_config["EndpointConfigArn"]
139137
)["Tags"]
140138

@@ -148,10 +146,6 @@ def test_deploy_model_with_tags_and_kms(mxnet_training_job, sagemaker_session, m
148146
assert endpoint_config["KmsKeyId"] == kms_key_arn
149147

150148

151-
@pytest.mark.skip(
152-
reason="This test has always failed, but the failure was masked by a bug. "
153-
"This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
154-
)
155149
def test_deploy_model_with_update_endpoint(
156150
mxnet_training_job, sagemaker_session, mxnet_full_version
157151
):
@@ -172,26 +166,37 @@ def test_deploy_model_with_update_endpoint(
172166
framework_version=mxnet_full_version,
173167
)
174168
model.deploy(1, "ml.t2.medium", endpoint_name=endpoint_name)
175-
old_endpoint = sagemaker_session.describe_endpoint(EndpointName=endpoint_name)
169+
old_endpoint = sagemaker_session.sagemaker_client.describe_endpoint(
170+
EndpointName=endpoint_name
171+
)
176172
old_config_name = old_endpoint["EndpointConfigName"]
177173

178174
model.deploy(1, "ml.m4.xlarge", update_endpoint=True, endpoint_name=endpoint_name)
179-
new_endpoint = sagemaker_session.describe_endpoint(EndpointName=endpoint_name)[
180-
"ProductionVariants"
181-
]
182-
new_production_variants = new_endpoint["ProductionVariants"]
175+
176+
# Wait for endpoint to finish updating
177+
max_retry_count = 40 # Endpoint update takes ~7min. 40 retries * 30s sleeps = 20min timeout
178+
current_retry_count = 0
179+
while current_retry_count <= max_retry_count:
180+
if current_retry_count >= max_retry_count:
181+
raise Exception("Endpoint status not 'InService' within expected timeout.")
182+
time.sleep(30)
183+
new_endpoint = sagemaker_session.sagemaker_client.describe_endpoint(
184+
EndpointName=endpoint_name
185+
)
186+
current_retry_count += 1
187+
if new_endpoint["EndpointStatus"] == "InService":
188+
break
189+
183190
new_config_name = new_endpoint["EndpointConfigName"]
191+
new_config = sagemaker_session.sagemaker_client.describe_endpoint_config(
192+
EndpointConfigName=new_config_name
193+
)
184194

185195
assert old_config_name != new_config_name
186-
assert new_production_variants["InstanceType"] == "ml.m4.xlarge"
187-
assert new_production_variants["InitialInstanceCount"] == 1
188-
assert new_production_variants["AcceleratorType"] is None
196+
assert new_config["ProductionVariants"][0]["InstanceType"] == "ml.m4.xlarge"
197+
assert new_config["ProductionVariants"][0]["InitialInstanceCount"] == 1
189198

190199

191-
@pytest.mark.skip(
192-
reason="This test has always failed, but the failure was masked by a bug. "
193-
"This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
194-
)
195200
def test_deploy_model_with_update_non_existing_endpoint(
196201
mxnet_training_job, sagemaker_session, mxnet_full_version
197202
):
@@ -216,7 +221,7 @@ def test_deploy_model_with_update_non_existing_endpoint(
216221
framework_version=mxnet_full_version,
217222
)
218223
model.deploy(1, "ml.t2.medium", endpoint_name=endpoint_name)
219-
sagemaker_session.describe_endpoint(EndpointName=endpoint_name)
224+
sagemaker_session.sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
220225

221226
with pytest.raises(ValueError, message=expected_error_message):
222227
model.deploy(

tests/integ/test_tf_script_mode.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,6 @@ def test_mnist_distributed(sagemaker_session, instance_type):
127127
)
128128

129129

130-
@pytest.mark.skip(
131-
reason="This test has always failed, but the failure was masked by a bug. "
132-
"This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
133-
)
134130
def test_mnist_async(sagemaker_session):
135131
estimator = TensorFlow(
136132
entry_point=SCRIPT,
@@ -168,9 +164,7 @@ def test_mnist_async(sagemaker_session):
168164
result = predictor.predict(np.zeros(784))
169165
print("predict result: {}".format(result))
170166
_assert_endpoint_tags_match(sagemaker_session.sagemaker_client, predictor.endpoint, TAGS)
171-
_assert_model_tags_match(
172-
sagemaker_session.sagemaker_client, estimator.latest_training_job.name, TAGS
173-
)
167+
_assert_model_tags_match(sagemaker_session.sagemaker_client, model_name, TAGS)
174168
_assert_model_name_match(sagemaker_session.sagemaker_client, endpoint_name, model_name)
175169

176170

tests/integ/test_tuner.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -803,10 +803,6 @@ def test_tuning_chainer(sagemaker_session):
803803

804804

805805
@pytest.mark.canary_quick
806-
@pytest.mark.skip(
807-
reason="This test has always failed, but the failure was masked by a bug. "
808-
"This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
809-
)
810806
def test_attach_tuning_pytorch(sagemaker_session):
811807
mnist_dir = os.path.join(DATA_DIR, "pytorch_mnist")
812808
mnist_script = os.path.join(mnist_dir, "mnist.py")

0 commit comments

Comments
 (0)