|
62 | 62 | "bucket='<bucket-name>'"
|
63 | 63 | ]
|
64 | 64 | },
|
| 65 | + { |
| 66 | + "cell_type": "code", |
| 67 | + "execution_count": null, |
| 68 | + "metadata": {}, |
| 69 | + "outputs": [], |
| 70 | + "source": [ |
| 71 | + "data_key = 'kmeans_example/data'\n", |
| 72 | + "data_location = 's3://{}/{}'.format(bucket, data_key)\n", |
| 73 | + "output_location = 's3://{}/kmeans_example/output'.format(bucket)\n", |
| 74 | + "\n", |
| 75 | + "print('training data will be uploaded to: {}'.format(data_location))\n", |
| 76 | + "print('training artifacts will be uploaded to: {}'.format(output_location))" |
| 77 | + ] |
| 78 | + }, |
65 | 79 | {
|
66 | 80 | "cell_type": "markdown",
|
67 | 81 | "metadata": {},
|
|
121 | 135 | "cell_type": "markdown",
|
122 | 136 | "metadata": {},
|
123 | 137 | "source": [
|
124 |
| - "### Data conversion\n", |
| 138 | + "### Data conversion and upload\n", |
125 | 139 | "\n",
|
126 | 140 | "Since algorithms have particular input and output requirements, converting the dataset is also part of the process that a data scientist goes through prior to initiating training. In this particular case, the hosted implementation of k-means takes recordio-wrapped protobuf, where the data we have today is a pickle-ized numpy array on disk.\n",
|
127 | 141 | "\n",
|
|
140 | 154 | "%%time\n",
|
141 | 155 | "from sagemaker.amazon.common import write_numpy_to_dense_tensor\n",
|
142 | 156 | "import io\n",
|
| 157 | + "import boto3\n", |
143 | 158 | "\n",
|
144 | 159 | "# Convert the training data into the format required by the SageMaker KMeans algorithm\n",
|
145 | 160 | "buf = io.BytesIO()\n",
|
146 | 161 | "write_numpy_to_dense_tensor(buf, train_set[0], train_set[1])\n",
|
147 |
| - "buf.seek(0)" |
148 |
| - ] |
149 |
| - }, |
150 |
| - { |
151 |
| - "cell_type": "code", |
152 |
| - "execution_count": null, |
153 |
| - "metadata": {}, |
154 |
| - "outputs": [], |
155 |
| - "source": [ |
156 |
| - "%%time\n", |
157 |
| - "\n", |
158 |
| - "import boto3\n", |
| 162 | + "buf.seek(0)\n", |
159 | 163 | "\n",
|
160 |
| - "key = 'kmeans_lowlevel_example/data'\n", |
161 |
| - "boto3.resource('s3').Bucket(bucket).Object(key).upload_fileobj(buf)\n", |
162 |
| - "s3_train_data = 's3://{}/{}'.format(bucket, key)\n", |
163 |
| - "print('uploaded training data location: {}'.format(s3_train_data))" |
| 164 | + "boto3.resource('s3').Bucket(bucket).Object(data_key).upload_fileobj(buf)" |
164 | 165 | ]
|
165 | 166 | },
|
166 | 167 | {
|
|
187 | 188 | "job_name = 'kmeans-lowlevel-' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n",
|
188 | 189 | "print(\"Training job\", job_name)\n",
|
189 | 190 | "\n",
|
| 191 | + "images = {'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/kmeans:latest',\n", |
| 192 | + " 'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/kmeans:latest',\n", |
| 193 | + " 'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/kmeans:latest',\n", |
| 194 | + " 'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/kmeans:latest'}\n", |
| 195 | + "image = images[boto3.Session().region_name]\n", |
| 196 | + "\n", |
190 | 197 | "create_training_params = \\\n",
|
191 | 198 | "{\n",
|
192 | 199 | " \"AlgorithmSpecification\": {\n",
|
193 |
| - " \"TrainingImage\": \"174872318107.dkr.ecr.us-west-2.amazonaws.com/kmeans:1\",\n", |
| 200 | + " \"TrainingImage\": image,\n", |
194 | 201 | " \"TrainingInputMode\": \"File\"\n",
|
195 | 202 | " },\n",
|
196 | 203 | " \"RoleArn\": role,\n",
|
197 | 204 | " \"OutputDataConfig\": {\n",
|
198 |
| - " \"S3OutputPath\": \"s3://{}/kmeans_lowlevel_example/output\".format(bucket)\n", |
| 205 | + " \"S3OutputPath\": output_location\n", |
199 | 206 | " },\n",
|
200 | 207 | " \"ResourceConfig\": {\n",
|
201 | 208 | " \"InstanceCount\": 2,\n",
|
|
218 | 225 | " \"DataSource\": {\n",
|
219 | 226 | " \"S3DataSource\": {\n",
|
220 | 227 | " \"S3DataType\": \"S3Prefix\",\n",
|
221 |
| - " \"S3Uri\": s3_train_data,\n", |
| 228 | + " \"S3Uri\": data_location,\n", |
222 | 229 | " \"S3DataDistributionType\": \"FullyReplicated\"\n",
|
223 | 230 | " }\n",
|
224 | 231 | " },\n",
|
|
278 | 285 | "model_data = info['ModelArtifacts']['S3ModelArtifacts']\n",
|
279 | 286 | "\n",
|
280 | 287 | "primary_container = {\n",
|
281 |
| - " 'Image': \"174872318107.dkr.ecr.us-west-2.amazonaws.com/kmeans:1\",\n", |
| 288 | + " 'Image': image,\n", |
282 | 289 | " 'ModelDataUrl': model_data\n",
|
283 | 290 | "}\n",
|
284 | 291 | "\n",
|
|
474 | 481 | "metadata": {},
|
475 | 482 | "outputs": [],
|
476 | 483 | "source": [
|
477 |
| - "sagemaker.delete_endpoint(EndpointName=endpoint_name)" |
| 484 | + "# Uncomment and run to delete\n", |
| 485 | + "\n", |
| 486 | + "# sagemaker.delete_endpoint(EndpointName=endpoint_name)" |
478 | 487 | ]
|
479 | 488 | },
|
480 | 489 | {
|
|
0 commit comments