Skip to content

Commit 3bd6ecb

Browse files
authored
Revert "Fixed notebooks for errors due to syntax change and cleaned notebooks (#1723)" (#1730)
This reverts commit e691349.
1 parent e691349 commit 3bd6ecb

File tree

1 file changed

+97
-73
lines changed

1 file changed

+97
-73
lines changed

introduction_to_amazon_algorithms/ntm_synthetic/ntm_synthetic.ipynb

Lines changed: 97 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -57,22 +57,23 @@
5757
"cell_type": "code",
5858
"execution_count": null,
5959
"metadata": {
60+
"collapsed": true,
6061
"isConfigCell": true,
6162
"tags": [
6263
"parameters"
6364
]
6465
},
6566
"outputs": [],
6667
"source": [
67-
"# Define IAM role\n",
68-
"import sagemaker\n",
69-
"import boto3\n",
70-
"import re\n",
71-
"from sagemaker import get_execution_role\n",
68+
"# Define IAM role\n",
69+
"import sagemaker\n",
70+
"import boto3\n",
71+
"import re\n",
72+
"from sagemaker import get_execution_role\n",
73+
"\n",
74+
"sess = sagemaker.Session()\n",
75+
"bucket=sess.default_bucket()",
7276
"\n",
73-
"sess = sagemaker.Session()\n",
74-
"bucket = sess.default_bucket()\n",
75-
"prefix = \"ntm_demo\"\n",
7677
"role = get_execution_role()"
7778
]
7879
},
@@ -86,7 +87,9 @@
8687
{
8788
"cell_type": "code",
8889
"execution_count": null,
89-
"metadata": {},
90+
"metadata": {
91+
"collapsed": true
92+
},
9093
"outputs": [],
9194
"source": [
9295
"import numpy as np\n",
@@ -101,8 +104,7 @@
101104
"from IPython.display import display\n",
102105
"import scipy\n",
103106
"import sagemaker.amazon.common as smac\n",
104-
"from sagemaker.serializers import CSVSerializer\n",
105-
"from sagemaker.deserializers import JSONDeserializer"
107+
"from sagemaker.predictor import csv_serializer, json_deserializer"
106108
]
107109
},
108110
{
@@ -118,19 +120,20 @@
118120
{
119121
"cell_type": "code",
120122
"execution_count": null,
121-
"metadata": {},
123+
"metadata": {
124+
"collapsed": true
125+
},
122126
"outputs": [],
123127
"source": [
124128
"# generate the sample data\n",
125129
"num_documents = 5000\n",
126130
"num_topics = 5\n",
127131
"vocabulary_size = 25\n",
128132
"known_alpha, known_beta, documents, topic_mixtures = generate_griffiths_data(\n",
129-
" num_documents=num_documents, num_topics=num_topics, vocabulary_size=vocabulary_size\n",
130-
")\n",
133+
" num_documents=num_documents, num_topics=num_topics, vocabulary_size=vocabulary_size)\n",
131134
"\n",
132135
"# separate the generated data into training and tests subsets\n",
133-
"num_documents_training = int(0.8 * num_documents)\n",
136+
"num_documents_training = int(0.8*num_documents)\n",
134137
"num_documents_test = num_documents - num_documents_training\n",
135138
"\n",
136139
"documents_training = documents[:num_documents_training]\n",
@@ -157,23 +160,27 @@
157160
{
158161
"cell_type": "code",
159162
"execution_count": null,
160-
"metadata": {},
163+
"metadata": {
164+
"collapsed": true
165+
},
161166
"outputs": [],
162167
"source": [
163-
"print(f\"First training document = {documents[0]}\")\n",
164-
"print(f\"\\nVocabulary size = {vocabulary_size}\")"
168+
"print('First training document = {}'.format(documents[0]))\n",
169+
"print('\\nVocabulary size = {}'.format(vocabulary_size))"
165170
]
166171
},
167172
{
168173
"cell_type": "code",
169174
"execution_count": null,
170-
"metadata": {},
175+
"metadata": {
176+
"collapsed": true
177+
},
171178
"outputs": [],
172179
"source": [
173180
"np.set_printoptions(precision=4, suppress=True)\n",
174181
"\n",
175-
"print(f\"Known topic mixture of first training document = {topic_mixtures_training[0]}\")\n",
176-
"print(f\"\\nNumber of topics = {num_topics}\")"
182+
"print('Known topic mixture of first training document = {}'.format(topic_mixtures_training[0]))\n",
183+
"print('\\nNumber of topics = {}'.format(num_topics))"
177184
]
178185
},
179186
{
@@ -186,13 +193,15 @@
186193
{
187194
"cell_type": "code",
188195
"execution_count": null,
189-
"metadata": {},
196+
"metadata": {
197+
"collapsed": true
198+
},
190199
"outputs": [],
191200
"source": [
192201
"%matplotlib inline\n",
193202
"\n",
194-
"fig = plot_topic_data(documents_training[:10], nrows=2, ncols=5, cmap=\"gray_r\", with_colorbar=False)\n",
195-
"fig.suptitle(\"Example Documents\")\n",
203+
"fig = plot_topic_data(documents_training[:10], nrows=2, ncols=5, cmap='gray_r', with_colorbar=False)\n",
204+
"fig.suptitle('Example Documents')\n",
196205
"fig.set_dpi(160)"
197206
]
198207
},
@@ -210,16 +219,18 @@
210219
{
211220
"cell_type": "code",
212221
"execution_count": null,
213-
"metadata": {},
222+
"metadata": {
223+
"collapsed": true
224+
},
214225
"outputs": [],
215226
"source": [
216227
"buf = io.BytesIO()\n",
217-
"smac.write_numpy_to_dense_tensor(buf, data_training[0].astype(\"float32\"))\n",
228+
"smac.write_numpy_to_dense_tensor(buf, data_training[0].astype('float32'))\n",
218229
"buf.seek(0)\n",
219230
"\n",
220-
"key = \"ntm.data\"\n",
221-
"boto3.resource(\"s3\").Bucket(bucket).Object(os.path.join(prefix, \"train\", key)).upload_fileobj(buf)\n",
222-
"s3_train_data = f\"s3://{bucket}/{prefix}/train/{key}\""
231+
"key = 'ntm.data'\n",
232+
"boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)\n",
233+
"s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)"
223234
]
224235
},
225236
{
@@ -236,12 +247,13 @@
236247
{
237248
"cell_type": "code",
238249
"execution_count": null,
239-
"metadata": {},
250+
"metadata": {
251+
"collapsed": true
252+
},
240253
"outputs": [],
241254
"source": [
242255
"from sagemaker.amazon.amazon_estimator import get_image_uri\n",
243-
"\n",
244-
"container = get_image_uri(boto3.Session().region_name, \"ntm\")"
256+
"container = get_image_uri(boto3.Session().region_name, 'ntm')"
245257
]
246258
},
247259
{
@@ -260,22 +272,23 @@
260272
{
261273
"cell_type": "code",
262274
"execution_count": null,
263-
"metadata": {},
275+
"metadata": {
276+
"collapsed": true
277+
},
264278
"outputs": [],
265279
"source": [
266280
"sess = sagemaker.Session()\n",
267281
"\n",
268-
"ntm = sagemaker.estimator.Estimator(\n",
269-
" container,\n",
270-
" role,\n",
271-
" train_instance_count=1,\n",
272-
" train_instance_type=\"ml.c4.xlarge\",\n",
273-
" output_path=f\"s3://{bucket}/{prefix}/output\",\n",
274-
" sagemaker_session=sess,\n",
275-
")\n",
276-
"ntm.set_hyperparameters(num_topics=num_topics, feature_dim=vocabulary_size)\n",
282+
"ntm = sagemaker.estimator.Estimator(container,\n",
283+
" role, \n",
284+
" train_instance_count=1, \n",
285+
" train_instance_type='ml.c4.xlarge',\n",
286+
" output_path='s3://{}/{}/output'.format(bucket, prefix),\n",
287+
" sagemaker_session=sess)\n",
288+
"ntm.set_hyperparameters(num_topics=num_topics,\n",
289+
" feature_dim=vocabulary_size)\n",
277290
"\n",
278-
"ntm.fit({\"train\": s3_train_data})"
291+
"ntm.fit({'train': s3_train_data})"
279292
]
280293
},
281294
{
@@ -294,10 +307,13 @@
294307
{
295308
"cell_type": "code",
296309
"execution_count": null,
297-
"metadata": {},
310+
"metadata": {
311+
"collapsed": true
312+
},
298313
"outputs": [],
299314
"source": [
300-
"ntm_predictor = ntm.deploy(initial_instance_count=1, instance_type=\"ml.m4.xlarge\")"
315+
"ntm_predictor = ntm.deploy(initial_instance_count=1,\n",
316+
" instance_type='ml.m4.xlarge')"
301317
]
302318
},
303319
{
@@ -322,11 +338,14 @@
322338
{
323339
"cell_type": "code",
324340
"execution_count": null,
325-
"metadata": {},
341+
"metadata": {
342+
"collapsed": true
343+
},
326344
"outputs": [],
327345
"source": [
328-
"ntm_predictor.serializer = CSVSerializer()\n",
329-
"ntm_predictor.deserializer = JSONDeserializer()"
346+
"ntm_predictor.content_type = 'text/csv'\n",
347+
"ntm_predictor.serializer = csv_serializer\n",
348+
"ntm_predictor.deserializer = json_deserializer"
330349
]
331350
},
332351
{
@@ -339,10 +358,12 @@
339358
{
340359
"cell_type": "code",
341360
"execution_count": null,
342-
"metadata": {},
361+
"metadata": {
362+
"collapsed": true
363+
},
343364
"outputs": [],
344365
"source": [
345-
"results = ntm_predictor.predict(documents_training[:10], initial_args={\"ContentType\": \"text/csv\"})\n",
366+
"results = ntm_predictor.predict(documents_training[:10])\n",
346367
"print(results)"
347368
]
348369
},
@@ -369,10 +390,12 @@
369390
{
370391
"cell_type": "code",
371392
"execution_count": null,
372-
"metadata": {},
393+
"metadata": {
394+
"collapsed": true
395+
},
373396
"outputs": [],
374397
"source": [
375-
"predictions = np.array([prediction[\"topic_weights\"] for prediction in results[\"predictions\"]])\n",
398+
"predictions = np.array([prediction['topic_weights'] for prediction in results['predictions']])\n",
376399
"\n",
377400
"print(predictions)"
378401
]
@@ -387,7 +410,9 @@
387410
{
388411
"cell_type": "code",
389412
"execution_count": null,
390-
"metadata": {},
413+
"metadata": {
414+
"collapsed": true
415+
},
391416
"outputs": [],
392417
"source": [
393418
"print(topic_mixtures_training[0]) # known topic mixture\n",
@@ -406,22 +431,26 @@
406431
{
407432
"cell_type": "code",
408433
"execution_count": null,
409-
"metadata": {},
434+
"metadata": {
435+
"collapsed": true
436+
},
410437
"outputs": [],
411438
"source": [
412439
"def predict_batches(data, rows=1000):\n",
413440
" split_array = np.array_split(data, int(data.shape[0] / float(rows) + 1))\n",
414441
" predictions = []\n",
415442
" for array in split_array:\n",
416-
" results = ntm_predictor.predict(array, initial_args={\"ContentType\": \"text/csv\"})\n",
417-
" predictions += [r[\"topic_weights\"] for r in results[\"predictions\"]]\n",
443+
" results = ntm_predictor.predict(array)\n",
444+
" predictions += [r['topic_weights'] for r in results['predictions']]\n",
418445
" return np.array(predictions)"
419446
]
420447
},
421448
{
422449
"cell_type": "code",
423450
"execution_count": null,
424-
"metadata": {},
451+
"metadata": {
452+
"collapsed": true
453+
},
425454
"outputs": [],
426455
"source": [
427456
"predictions = predict_batches(documents_training)"
@@ -437,17 +466,15 @@
437466
{
438467
"cell_type": "code",
439468
"execution_count": null,
440-
"metadata": {},
469+
"metadata": {
470+
"collapsed": true
471+
},
441472
"outputs": [],
442473
"source": [
443-
"data = pd.DataFrame(\n",
444-
" np.concatenate([topic_mixtures_training, predictions], axis=1),\n",
445-
" columns=[f\"actual_{i}\" for i in range(5)] + [f\"predictions_{i}\" for i in range(5)],\n",
446-
")\n",
474+
"data = pd.DataFrame(np.concatenate([topic_mixtures_training, predictions], axis=1), \n",
475+
" columns=['actual_{}'.format(i) for i in range(5)] + ['predictions_{}'.format(i) for i in range(5)])\n",
447476
"display(data.corr())\n",
448-
"pd.plotting.scatter_matrix(\n",
449-
" pd.DataFrame(np.concatenate([topic_mixtures_training, predictions], axis=1)), figsize=(12, 12)\n",
450-
")\n",
477+
"pd.plotting.scatter_matrix(pd.DataFrame(np.concatenate([topic_mixtures_training, predictions], axis=1)), figsize=(12, 12))\n",
451478
"plt.show()"
452479
]
453480
},
@@ -478,9 +505,7 @@
478505
"cell_type": "code",
479506
"execution_count": null,
480507
"metadata": {
481-
"jupyter": {
482-
"source_hidden": true
483-
}
508+
"collapsed": true
484509
},
485510
"outputs": [],
486511
"source": [
@@ -501,11 +526,10 @@
501526
],
502527
"metadata": {
503528
"celltoolbar": "Tags",
504-
"instance_type": "ml.t3.medium",
505529
"kernelspec": {
506-
"display_name": "Python 3 (Data Science)",
530+
"display_name": "Environment (conda_python3)",
507531
"language": "python",
508-
"name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0"
532+
"name": "conda_python3"
509533
},
510534
"language_info": {
511535
"codemirror_mode": {
@@ -517,10 +541,10 @@
517541
"name": "python",
518542
"nbconvert_exporter": "python",
519543
"pygments_lexer": "ipython3",
520-
"version": "3.7.6"
544+
"version": "3.6.3"
521545
},
522546
"notice": "Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License."
523547
},
524548
"nbformat": 4,
525-
"nbformat_minor": 4
549+
"nbformat_minor": 2
526550
}

0 commit comments

Comments
 (0)