Skip to content

Commit 920c072

Browse files
dlraghaajaykarpur
authored andcommitted
fix: multi model integration test to create ECR repo with unique names to allow independent parallel executions (#1172)
1 parent 967ec94 commit 920c072

File tree

1 file changed

+30
-8
lines changed

1 file changed

+30
-8
lines changed

tests/integ/test_multidatamodel.py

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414

1515
import base64
1616
import os
17+
import requests
1718

19+
import botocore
1820
import docker
1921
import numpy
2022
import pytest
@@ -30,7 +32,6 @@
3032
from tests.integ.retry import retries
3133
from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name
3234

33-
ALGORITHM_NAME = "sagemaker-multimodel-integ-test"
3435
ROLE = "SageMakerRole"
3536
PRETRAINED_MODEL_PATH_1 = "customer_a/dummy_model.tar.gz"
3637
PRETRAINED_MODEL_PATH_2 = "customer_b/dummy_model.tar.gz"
@@ -47,27 +48,36 @@ def container_image(sagemaker_session):
4748
"sts", region_name=region, endpoint_url=utils.sts_regional_endpoint(region)
4849
)
4950
account_id = sts_client.get_caller_identity()["Account"]
51+
algorithm_name = "sagemaker-multimodel-integ-test-{}".format(sagemaker_timestamp())
5052
ecr_image = "{account}.dkr.ecr.{region}.amazonaws.com/{algorithm_name}:latest".format(
51-
account=account_id, region=region, algorithm_name=ALGORITHM_NAME
53+
account=account_id, region=region, algorithm_name=algorithm_name
5254
)
5355

5456
# Build and tag docker image locally
5557
docker_client = docker.from_env()
5658
image, build_log = docker_client.images.build(
57-
path=os.path.join(DATA_DIR, "multimodel", "container"), tag=ALGORITHM_NAME, rm=True
59+
path=os.path.join(DATA_DIR, "multimodel", "container"), tag=algorithm_name, rm=True
5860
)
5961
image.tag(ecr_image, tag="latest")
6062

6163
# Create AWS ECR and push the local docker image to it
62-
_create_repository(ecr_client, ALGORITHM_NAME)
64+
_create_repository(ecr_client, algorithm_name)
6365
username, password = _ecr_login(ecr_client)
64-
docker_client.images.push(ecr_image, auth_config={"username": username, "password": password})
66+
# Retry docker image push
67+
for _ in retries(3, "Upload docker image to ECR repo", seconds_to_sleep=10):
68+
try:
69+
docker_client.images.push(
70+
ecr_image, auth_config={"username": username, "password": password}
71+
)
72+
break
73+
except requests.exceptions.ConnectionError:
74+
# This can happen when we try to create multiple repositories in parallel, so we retry
75+
pass
76+
6577
yield ecr_image
6678

6779
# Delete repository after the multi model integration tests complete
68-
repo = ecr_client.describe_repositories(repositoryNames=[ALGORITHM_NAME])
69-
if "repositories" in repo:
70-
ecr_client.delete_repository(repositoryName=ALGORITHM_NAME, force=True)
80+
_delete_repository(ecr_client, algorithm_name)
7181

7282

7383
def _create_repository(ecr_client, repository_name):
@@ -87,6 +97,18 @@ def _create_repository(ecr_client, repository_name):
8797
raise
8898

8999

100+
def _delete_repository(ecr_client, repository_name):
101+
"""
102+
Deletes an ECS Repository (ECR). After the integration test completes
103+
we will remove the repository created during setup
104+
"""
105+
try:
106+
ecr_client.describe_repositories(repositoryNames=[repository_name])
107+
ecr_client.delete_repository(repositoryName=repository_name, force=True)
108+
except botocore.errorfactory.ResourceNotFoundException:
109+
pass
110+
111+
90112
def _ecr_login(ecr_client):
91113
""" Get a login credentials for an ecr client.
92114
"""

0 commit comments

Comments
 (0)