-
Notifications
You must be signed in to change notification settings - Fork 1.2k
fix: change AMI ids in tests to be dynamic based on regions #1004
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
a877ca9
2ca2c9e
a4ec855
1bdf0ad
c43ff38
f41d3c5
0393384
6b6e070
b80ccbc
6a95b89
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
|
||
import collections | ||
import logging | ||
from operator import itemgetter | ||
import os | ||
from os import path | ||
import stat | ||
|
@@ -31,9 +32,7 @@ | |
PREFIX = "ec2_fs_key_" | ||
KEY_NAME = PREFIX + str(uuid.uuid4().hex.upper()[0:8]) | ||
ROLE_NAME = "SageMakerRole" | ||
REGION = "us-west-2" | ||
EC2_INSTANCE_TYPE = "t2.micro" | ||
AMI_ID = "ami-082b5a644766e0e6f" | ||
MIN_COUNT = 1 | ||
MAX_COUNT = 1 | ||
|
||
|
@@ -69,12 +68,13 @@ def set_up_efs_fsx(sagemaker_session): | |
_check_or_create_key_pair(sagemaker_session) | ||
_check_or_create_iam_profile_and_attach_role(sagemaker_session) | ||
subnet_ids, security_group_ids = check_or_create_vpc_resources_efs_fsx( | ||
sagemaker_session, REGION, VPC_NAME | ||
sagemaker_session, VPC_NAME | ||
) | ||
|
||
ami_id = _dynamic_ami_id(sagemaker_session) | ||
ec2_instance = _create_ec2_instance( | ||
sagemaker_session, | ||
AMI_ID, | ||
ami_id, | ||
EC2_INSTANCE_TYPE, | ||
KEY_NAME, | ||
MIN_COUNT, | ||
|
@@ -100,16 +100,35 @@ def set_up_efs_fsx(sagemaker_session): | |
mount_efs_target_id, | ||
) | ||
|
||
region = sagemaker_session.boto_region_name | ||
try: | ||
connected_instance = _connect_ec2_instance(ec2_instance) | ||
_upload_data_and_mount_fs(connected_instance, file_system_efs_id, file_system_fsx_id) | ||
_upload_data_and_mount_fs( | ||
connected_instance, file_system_efs_id, file_system_fsx_id, region | ||
) | ||
except Exception: | ||
tear_down(sagemaker_session, fs_resources) | ||
raise | ||
|
||
return fs_resources | ||
|
||
|
||
def _dynamic_ami_id(sagemaker_session): | ||
ec2_client = sagemaker_session.boto_session.client("ec2") | ||
filters = [ | ||
{"Name": "name", "Values": ["amzn-ami-hvm-????.??.?.????????-x86_64-gp2"]}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's make this a constant. that'll both make it easier to find if it needs to be changed later for whatever reason and help describe what the string is. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure |
||
{"Name": "state", "Values": ["available"]}, | ||
] | ||
response = ec2_client.describe_images(Filters=filters) | ||
|
||
image_details = sorted(response["Images"], key=itemgetter("CreationDate"), reverse=True) | ||
if len(image_details) > 0: | ||
ami_id = image_details[0]["ImageId"] | ||
return ami_id | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: you can combine ll. 126-127 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure |
||
else: | ||
raise Exception("AMI was not found based on current search criteria: {}".format(filters)) | ||
|
||
|
||
def _connect_ec2_instance(ec2_instance): | ||
public_ip_address = ec2_instance.public_ip_address | ||
connected_instance = Connection( | ||
|
@@ -118,7 +137,7 @@ def _connect_ec2_instance(ec2_instance): | |
return connected_instance | ||
|
||
|
||
def _upload_data_and_mount_fs(connected_instance, file_system_efs_id, file_system_fsx_id): | ||
def _upload_data_and_mount_fs(connected_instance, file_system_efs_id, file_system_fsx_id, region): | ||
connected_instance.put(FS_MOUNT_SCRIPT, ".") | ||
connected_instance.run("mkdir temp_tf; mkdir temp_one_p", in_stream=False) | ||
for dir_name, subdir_list, file_list in os.walk(MNIST_LOCAL_DATA): | ||
|
@@ -127,7 +146,7 @@ def _upload_data_and_mount_fs(connected_instance, file_system_efs_id, file_syste | |
connected_instance.put(local_file, "temp_tf/") | ||
connected_instance.put(ONE_P_LOCAL_DATA, "temp_one_p/") | ||
connected_instance.run( | ||
"sudo sh fs_mount_setup.sh {} {}".format(file_system_efs_id, file_system_fsx_id), | ||
"sudo sh fs_mount_setup.sh {} {} {}".format(file_system_efs_id, file_system_fsx_id, region), | ||
in_stream=False, | ||
) | ||
|
||
|
@@ -168,7 +187,7 @@ def _check_or_create_efs(sagemaker_session): | |
|
||
def _create_efs_mount(sagemaker_session, file_system_id): | ||
subnet_ids, security_group_ids = check_or_create_vpc_resources_efs_fsx( | ||
sagemaker_session, REGION, VPC_NAME | ||
sagemaker_session, VPC_NAME | ||
) | ||
efs_client = sagemaker_session.boto_session.client("efs") | ||
mount_response = efs_client.create_mount_target( | ||
|
@@ -188,7 +207,7 @@ def _create_efs_mount(sagemaker_session, file_system_id): | |
def _check_or_create_fsx(sagemaker_session): | ||
fsx_client = sagemaker_session.boto_session.client("fsx") | ||
subnet_ids, security_group_ids = check_or_create_vpc_resources_efs_fsx( | ||
sagemaker_session, REGION, VPC_NAME | ||
sagemaker_session, VPC_NAME | ||
) | ||
create_response = fsx_client.create_file_system( | ||
FileSystemType="LUSTRE", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,18 +16,19 @@ | |
# Mounting EFS and FSx for Lustre file systems for integration Tests | ||
FILE_SYSTEM_EFS_ID=$1 | ||
FILE_SYSTEM_FSX_ID=$2 | ||
REGION=$3 | ||
|
||
echo "Mounting EFS File Systems" | ||
sudo yum install -y amazon-efs-utils.noarch 0:1.10-1.amzn2 | ||
sudo yum install -y amazon-efs-utils | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do different regions have different versions released? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The list for the AMI ids are Amazon Linux AMI 2018, while the old hard-coded one is Amazon Linux2 AMI. |
||
sudo mkdir efs | ||
sudo mount -t efs "$FILE_SYSTEM_EFS_ID":/ efs | ||
sudo mkdir efs/tensorflow | ||
sudo mkdir efs/one_p_mnist | ||
|
||
echo "Mounting FSx for Lustre File System" | ||
sudo amazon-linux-extras install -y lustre2.10 | ||
sudo yum install -y lustre-client | ||
sudo mkdir -p /mnt/fsx | ||
sudo mount -t lustre -o noatime,flock "$FILE_SYSTEM_FSX_ID".fsx.us-west-2.amazonaws.com@tcp:/fsx /mnt/fsx | ||
sudo mount -t lustre -o noatime,flock "$FILE_SYSTEM_FSX_ID".fsx."$REGION".amazonaws.com@tcp:/fsx /mnt/fsx | ||
sudo mkdir /mnt/fsx/tensorflow | ||
sudo mkdir /mnt/fsx/one_p_mnist | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
let's rename this to
_ami_id_for_region