Make beginning cells identical

winstonaws · winstonaws · commit d0a5bf19d3a4 · 2017-11-24T15:26:19.000-08:00
diff --git a/sagemaker-python-sdk/1P_kmeans_highlevel/kmeans_mnist.ipynb b/sagemaker-python-sdk/1P_kmeans_highlevel/kmeans_mnist.ipynb
@@ -68,8 +68,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "output_location = 's3://{}/kmeans_highlevel_example/output'.format(bucket)\n",
-    "data_location = 's3://{}/kmeans_highlevel_example/data'.format(bucket)\n",
+    "data_key = 'kmeans_example/data'\n",
+    "data_location = 's3://{}/{}'.format(bucket, data_key)\n",
+    "output_location = 's3://{}/kmeans_example/output'.format(bucket)\n",
     "\n",
     "print('training data will be uploaded to: {}'.format(data_location))\n",
     "print('training artifacts will be uploaded to: {}'.format(output_location))"
@@ -130,13 +131,6 @@
     "show_digit(train_set[0][30], 'This is a {}'.format(train_set[1][30]))"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Upload training data"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
diff --git a/sagemaker-python-sdk/1P_kmeans_lowlevel/kmeans_mnist_lowlevel.ipynb b/sagemaker-python-sdk/1P_kmeans_lowlevel/kmeans_mnist_lowlevel.ipynb
@@ -62,6 +62,20 @@
     "bucket='<bucket-name>'"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_key = 'kmeans_example/data'\n",
+    "data_location = 's3://{}/{}'.format(bucket, data_key)\n",
+    "output_location = 's3://{}/kmeans_example/output'.format(bucket)\n",
+    "\n",
+    "print('training data will be uploaded to: {}'.format(data_location))\n",
+    "print('training artifacts will be uploaded to: {}'.format(output_location))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -121,7 +135,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Data conversion\n",
+    "### Data conversion and upload\n",
     "\n",
     "Since algorithms have particular input and output requirements, converting the dataset is also part of the process that a data scientist goes through prior to initiating training. In this particular case, the hosted implementation of k-means takes recordio-wrapped protobuf, where the data we have today is a pickle-ized numpy array on disk.\n",
     "\n",
@@ -140,27 +154,14 @@
     "%%time\n",
     "from sagemaker.amazon.common import write_numpy_to_dense_tensor\n",
     "import io\n",
+    "import boto3\n",
     "\n",
     "# Convert the training data into the format required by the SageMaker KMeans algorithm\n",
     "buf = io.BytesIO()\n",
     "write_numpy_to_dense_tensor(buf, train_set[0], train_set[1])\n",
-    "buf.seek(0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%time\n",
-    "\n",
-    "import boto3\n",
+    "buf.seek(0)\n",
     "\n",
-    "key = 'kmeans_lowlevel_example/data'\n",
-    "boto3.resource('s3').Bucket(bucket).Object(key).upload_fileobj(buf)\n",
-    "s3_train_data = 's3://{}/{}'.format(bucket, key)\n",
-    "print('uploaded training data location: {}'.format(s3_train_data))"
+    "boto3.resource('s3').Bucket(bucket).Object(data_key).upload_fileobj(buf)"
    ]
   },
   {
@@ -201,7 +202,7 @@
     "    },\n",
     "    \"RoleArn\": role,\n",
     "    \"OutputDataConfig\": {\n",
-    "        \"S3OutputPath\": \"s3://{}/kmeans_lowlevel_example/output\".format(bucket)\n",
+    "        \"S3OutputPath\": output_location\n",
     "    },\n",
     "    \"ResourceConfig\": {\n",
     "        \"InstanceCount\": 2,\n",
@@ -224,7 +225,7 @@
     "            \"DataSource\": {\n",
     "                \"S3DataSource\": {\n",
     "                    \"S3DataType\": \"S3Prefix\",\n",
-    "                    \"S3Uri\": s3_train_data,\n",
+    "                    \"S3Uri\": data_location,\n",
     "                    \"S3DataDistributionType\": \"FullyReplicated\"\n",
     "                }\n",
     "            },\n",