Skip to content

Commit 730cff6

Browse files
authored
Merge pull request aws#37 from awslabs/kmeans_regional_images
Kmeans regional images
2 parents d513837 + d0a5bf1 commit 730cff6

File tree

2 files changed

+38
-34
lines changed

2 files changed

+38
-34
lines changed

sagemaker-python-sdk/1P_kmeans_highlevel/kmeans_mnist.ipynb

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"\n",
4444
"Here we set up the linkage and authentication to AWS services. There are three parts to this:\n",
4545
"\n",
46-
"1. The credentials and region for the account that's running training. Upload the credentials in the normal AWS credentials file format using the jupyter upload feature. The region must always be `us-west-2` during the Beta program.\n",
46+
"1. The credentials and region for the account that's running training. Upload the credentials in the normal AWS credentials file format using the jupyter upload feature.\n",
4747
"2. The roles used to give learning and hosting access to your data. See the documentation for how to specify these.\n",
4848
"3. The S3 bucket that you want to use for training and model data.\n",
4949
"\n",
@@ -68,8 +68,9 @@
6868
"metadata": {},
6969
"outputs": [],
7070
"source": [
71-
"output_location = 's3://{}/kmeans_highlevel_example/output'.format(bucket)\n",
72-
"data_location = 's3://{}/kmeans_highlevel_example/data'.format(bucket)\n",
71+
"data_key = 'kmeans_example/data'\n",
72+
"data_location = 's3://{}/{}'.format(bucket, data_key)\n",
73+
"output_location = 's3://{}/kmeans_example/output'.format(bucket)\n",
7374
"\n",
7475
"print('training data will be uploaded to: {}'.format(data_location))\n",
7576
"print('training artifacts will be uploaded to: {}'.format(output_location))"
@@ -130,13 +131,6 @@
130131
"show_digit(train_set[0][30], 'This is a {}'.format(train_set[1][30]))"
131132
]
132133
},
133-
{
134-
"cell_type": "markdown",
135-
"metadata": {},
136-
"source": [
137-
"## Upload training data"
138-
]
139-
},
140134
{
141135
"cell_type": "markdown",
142136
"metadata": {},
@@ -292,9 +286,10 @@
292286
"metadata": {},
293287
"outputs": [],
294288
"source": [
295-
"import sagemaker\n",
289+
"# Uncomment and run to delete\n",
296290
"\n",
297-
"sagemaker.Session().delete_endpoint(kmeans_predictor.endpoint)"
291+
"#import sagemaker\n",
292+
"#sagemaker.Session().delete_endpoint(kmeans_predictor.endpoint)"
298293
]
299294
},
300295
{

sagemaker-python-sdk/1P_kmeans_lowlevel/kmeans_mnist_lowlevel.ipynb

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,20 @@
6262
"bucket='<bucket-name>'"
6363
]
6464
},
65+
{
66+
"cell_type": "code",
67+
"execution_count": null,
68+
"metadata": {},
69+
"outputs": [],
70+
"source": [
71+
"data_key = 'kmeans_example/data'\n",
72+
"data_location = 's3://{}/{}'.format(bucket, data_key)\n",
73+
"output_location = 's3://{}/kmeans_example/output'.format(bucket)\n",
74+
"\n",
75+
"print('training data will be uploaded to: {}'.format(data_location))\n",
76+
"print('training artifacts will be uploaded to: {}'.format(output_location))"
77+
]
78+
},
6579
{
6680
"cell_type": "markdown",
6781
"metadata": {},
@@ -121,7 +135,7 @@
121135
"cell_type": "markdown",
122136
"metadata": {},
123137
"source": [
124-
"### Data conversion\n",
138+
"### Data conversion and upload\n",
125139
"\n",
126140
"Since algorithms have particular input and output requirements, converting the dataset is also part of the process that a data scientist goes through prior to initiating training. In this particular case, the hosted implementation of k-means takes recordio-wrapped protobuf, where the data we have today is a pickle-ized numpy array on disk.\n",
127141
"\n",
@@ -140,27 +154,14 @@
140154
"%%time\n",
141155
"from sagemaker.amazon.common import write_numpy_to_dense_tensor\n",
142156
"import io\n",
157+
"import boto3\n",
143158
"\n",
144159
"# Convert the training data into the format required by the SageMaker KMeans algorithm\n",
145160
"buf = io.BytesIO()\n",
146161
"write_numpy_to_dense_tensor(buf, train_set[0], train_set[1])\n",
147-
"buf.seek(0)"
148-
]
149-
},
150-
{
151-
"cell_type": "code",
152-
"execution_count": null,
153-
"metadata": {},
154-
"outputs": [],
155-
"source": [
156-
"%%time\n",
157-
"\n",
158-
"import boto3\n",
162+
"buf.seek(0)\n",
159163
"\n",
160-
"key = 'kmeans_lowlevel_example/data'\n",
161-
"boto3.resource('s3').Bucket(bucket).Object(key).upload_fileobj(buf)\n",
162-
"s3_train_data = 's3://{}/{}'.format(bucket, key)\n",
163-
"print('uploaded training data location: {}'.format(s3_train_data))"
164+
"boto3.resource('s3').Bucket(bucket).Object(data_key).upload_fileobj(buf)"
164165
]
165166
},
166167
{
@@ -187,15 +188,21 @@
187188
"job_name = 'kmeans-lowlevel-' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n",
188189
"print(\"Training job\", job_name)\n",
189190
"\n",
191+
"images = {'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/kmeans:latest',\n",
192+
" 'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/kmeans:latest',\n",
193+
" 'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/kmeans:latest',\n",
194+
" 'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/kmeans:latest'}\n",
195+
"image = images[boto3.Session().region_name]\n",
196+
"\n",
190197
"create_training_params = \\\n",
191198
"{\n",
192199
" \"AlgorithmSpecification\": {\n",
193-
" \"TrainingImage\": \"174872318107.dkr.ecr.us-west-2.amazonaws.com/kmeans:1\",\n",
200+
" \"TrainingImage\": image,\n",
194201
" \"TrainingInputMode\": \"File\"\n",
195202
" },\n",
196203
" \"RoleArn\": role,\n",
197204
" \"OutputDataConfig\": {\n",
198-
" \"S3OutputPath\": \"s3://{}/kmeans_lowlevel_example/output\".format(bucket)\n",
205+
" \"S3OutputPath\": output_location\n",
199206
" },\n",
200207
" \"ResourceConfig\": {\n",
201208
" \"InstanceCount\": 2,\n",
@@ -218,7 +225,7 @@
218225
" \"DataSource\": {\n",
219226
" \"S3DataSource\": {\n",
220227
" \"S3DataType\": \"S3Prefix\",\n",
221-
" \"S3Uri\": s3_train_data,\n",
228+
" \"S3Uri\": data_location,\n",
222229
" \"S3DataDistributionType\": \"FullyReplicated\"\n",
223230
" }\n",
224231
" },\n",
@@ -278,7 +285,7 @@
278285
"model_data = info['ModelArtifacts']['S3ModelArtifacts']\n",
279286
"\n",
280287
"primary_container = {\n",
281-
" 'Image': \"174872318107.dkr.ecr.us-west-2.amazonaws.com/kmeans:1\",\n",
288+
" 'Image': image,\n",
282289
" 'ModelDataUrl': model_data\n",
283290
"}\n",
284291
"\n",
@@ -474,7 +481,9 @@
474481
"metadata": {},
475482
"outputs": [],
476483
"source": [
477-
"sagemaker.delete_endpoint(EndpointName=endpoint_name)"
484+
"# Uncomment and run to delete\n",
485+
"\n",
486+
"# sagemaker.delete_endpoint(EndpointName=endpoint_name)"
478487
]
479488
},
480489
{

0 commit comments

Comments
 (0)