Skip to content

Commit d0a5bf1

Browse files
committed
Make beginning cells identical
1 parent 33a0549 commit d0a5bf1

File tree

2 files changed

+23
-28
lines changed

2 files changed

+23
-28
lines changed

sagemaker-python-sdk/1P_kmeans_highlevel/kmeans_mnist.ipynb

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,9 @@
6868
"metadata": {},
6969
"outputs": [],
7070
"source": [
71-
"output_location = 's3://{}/kmeans_highlevel_example/output'.format(bucket)\n",
72-
"data_location = 's3://{}/kmeans_highlevel_example/data'.format(bucket)\n",
71+
"data_key = 'kmeans_example/data'\n",
72+
"data_location = 's3://{}/{}'.format(bucket, data_key)\n",
73+
"output_location = 's3://{}/kmeans_example/output'.format(bucket)\n",
7374
"\n",
7475
"print('training data will be uploaded to: {}'.format(data_location))\n",
7576
"print('training artifacts will be uploaded to: {}'.format(output_location))"
@@ -130,13 +131,6 @@
130131
"show_digit(train_set[0][30], 'This is a {}'.format(train_set[1][30]))"
131132
]
132133
},
133-
{
134-
"cell_type": "markdown",
135-
"metadata": {},
136-
"source": [
137-
"## Upload training data"
138-
]
139-
},
140134
{
141135
"cell_type": "markdown",
142136
"metadata": {},

sagemaker-python-sdk/1P_kmeans_lowlevel/kmeans_mnist_lowlevel.ipynb

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,20 @@
6262
"bucket='<bucket-name>'"
6363
]
6464
},
65+
{
66+
"cell_type": "code",
67+
"execution_count": null,
68+
"metadata": {},
69+
"outputs": [],
70+
"source": [
71+
"data_key = 'kmeans_example/data'\n",
72+
"data_location = 's3://{}/{}'.format(bucket, data_key)\n",
73+
"output_location = 's3://{}/kmeans_example/output'.format(bucket)\n",
74+
"\n",
75+
"print('training data will be uploaded to: {}'.format(data_location))\n",
76+
"print('training artifacts will be uploaded to: {}'.format(output_location))"
77+
]
78+
},
6579
{
6680
"cell_type": "markdown",
6781
"metadata": {},
@@ -121,7 +135,7 @@
121135
"cell_type": "markdown",
122136
"metadata": {},
123137
"source": [
124-
"### Data conversion\n",
138+
"### Data conversion and upload\n",
125139
"\n",
126140
"Since algorithms have particular input and output requirements, converting the dataset is also part of the process that a data scientist goes through prior to initiating training. In this particular case, the hosted implementation of k-means takes recordio-wrapped protobuf, where the data we have today is a pickle-ized numpy array on disk.\n",
127141
"\n",
@@ -140,27 +154,14 @@
140154
"%%time\n",
141155
"from sagemaker.amazon.common import write_numpy_to_dense_tensor\n",
142156
"import io\n",
157+
"import boto3\n",
143158
"\n",
144159
"# Convert the training data into the format required by the SageMaker KMeans algorithm\n",
145160
"buf = io.BytesIO()\n",
146161
"write_numpy_to_dense_tensor(buf, train_set[0], train_set[1])\n",
147-
"buf.seek(0)"
148-
]
149-
},
150-
{
151-
"cell_type": "code",
152-
"execution_count": null,
153-
"metadata": {},
154-
"outputs": [],
155-
"source": [
156-
"%%time\n",
157-
"\n",
158-
"import boto3\n",
162+
"buf.seek(0)\n",
159163
"\n",
160-
"key = 'kmeans_lowlevel_example/data'\n",
161-
"boto3.resource('s3').Bucket(bucket).Object(key).upload_fileobj(buf)\n",
162-
"s3_train_data = 's3://{}/{}'.format(bucket, key)\n",
163-
"print('uploaded training data location: {}'.format(s3_train_data))"
164+
"boto3.resource('s3').Bucket(bucket).Object(data_key).upload_fileobj(buf)"
164165
]
165166
},
166167
{
@@ -201,7 +202,7 @@
201202
" },\n",
202203
" \"RoleArn\": role,\n",
203204
" \"OutputDataConfig\": {\n",
204-
" \"S3OutputPath\": \"s3://{}/kmeans_lowlevel_example/output\".format(bucket)\n",
205+
" \"S3OutputPath\": output_location\n",
205206
" },\n",
206207
" \"ResourceConfig\": {\n",
207208
" \"InstanceCount\": 2,\n",
@@ -224,7 +225,7 @@
224225
" \"DataSource\": {\n",
225226
" \"S3DataSource\": {\n",
226227
" \"S3DataType\": \"S3Prefix\",\n",
227-
" \"S3Uri\": s3_train_data,\n",
228+
" \"S3Uri\": data_location,\n",
228229
" \"S3DataDistributionType\": \"FullyReplicated\"\n",
229230
" }\n",
230231
" },\n",

0 commit comments

Comments
 (0)