Skip to content

Commit dc5dc7c

Browse files
authored
Fixing Debugger BYOC example to use pySDK v1 (#1567)
* publish BYOC with Debugger notebook * some test change * revert the kernel names in the metadata * fix typos * incorporate feedback * incorporate comments * pin to pysdk v1 * remove installation output logs Co-authored-by: Miyoung Choi <[email protected]>
1 parent 09b92ef commit dc5dc7c

File tree

1 file changed

+120
-131
lines changed

1 file changed

+120
-131
lines changed

sagemaker-debugger/build_your_own_container_with_debugger/debugger_byoc.ipynb

Lines changed: 120 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
"outputs": [],
5858
"source": [
5959
"import sys\n",
60-
"!{sys.executable} -m pip install \"sagemaker>=2.0.0\" smdebug"
60+
"!{sys.executable} -m pip install \"sagemaker==1.72.0\" smdebug"
6161
]
6262
},
6363
{
@@ -78,20 +78,9 @@
7878
},
7979
{
8080
"cell_type": "code",
81-
"execution_count": 329,
81+
"execution_count": null,
8282
"metadata": {},
83-
"outputs": [
84-
{
85-
"data": {
86-
"text/plain": [
87-
"'2.5.0'"
88-
]
89-
},
90-
"execution_count": 329,
91-
"metadata": {},
92-
"output_type": "execute_result"
93-
}
94-
],
83+
"outputs": [],
9584
"source": [
9685
"import sagemaker\n",
9786
"sagemaker.__version__"
@@ -117,7 +106,7 @@
117106
},
118107
{
119108
"cell_type": "code",
120-
"execution_count": 2,
109+
"execution_count": 3,
121110
"metadata": {
122111
"scrolled": true
123112
},
@@ -126,22 +115,22 @@
126115
"name": "stdout",
127116
"output_type": "stream",
128117
"text": [
129-
"FROM tensorflow/tensorflow:2.2.0rc2-py3-jupyter\n",
130-
"\n",
131-
"# Install Amazon SageMaker Python SDK liabrary for training and smdebug\n",
132-
"RUN pip install sagemaker-training\n",
133-
"RUN pip install smdebug\n",
134-
"\n",
135-
"# Copies the training code inside the container\n",
136-
"COPY tf_keras_resnet_byoc.py /opt/ml/code/tf_keras_resnet_byoc.py\n",
137-
"\n",
138-
"# Defines train.py as script entrypoint\n",
139-
"ENV SAGEMAKER_PROGRAM tf_keras_resnet_byoc.py"
118+
"\u001b[34mFROM\u001b[39;49;00m \u001b[33mtensorflow/tensorflow:2.2.0rc2-py3-jupyter\u001b[39;49;00m\r\n",
119+
"\r\n",
120+
"\u001b[37m# Install Amazon SageMaker training toolkit and smdebug libraries\u001b[39;49;00m\r\n",
121+
"\u001b[34mRUN\u001b[39;49;00m pip install sagemaker-training\r\n",
122+
"\u001b[34mRUN\u001b[39;49;00m pip install smdebug\r\n",
123+
"\r\n",
124+
"\u001b[37m# Copies the training code inside the container\u001b[39;49;00m\r\n",
125+
"\u001b[34mCOPY\u001b[39;49;00m tf_keras_resnet_byoc.py /opt/ml/code/tf_keras_resnet_byoc.py\r\n",
126+
"\r\n",
127+
"\u001b[37m# Defines train.py as script entrypoint\u001b[39;49;00m\r\n",
128+
"\u001b[34mENV\u001b[39;49;00m SAGEMAKER_PROGRAM tf_keras_resnet_byoc.py\r\n"
140129
]
141130
}
142131
],
143132
"source": [
144-
"! cat docker/Dockerfile"
133+
"! pygmentize docker/Dockerfile"
145134
]
146135
},
147136
{
@@ -168,108 +157,108 @@
168157
"name": "stdout",
169158
"output_type": "stream",
170159
"text": [
171-
"\"\"\"\n",
172-
"This script is a ResNet training script which uses Tensorflow's Keras interface, and provides an example of how to use SageMaker Debugger when you use your own custom container in SageMaker or your own script outside SageMaker.\n",
173-
"It has been orchestrated with SageMaker Debugger hooks to allow saving tensors during training.\n",
174-
"These hooks have been instrumented to read from a JSON configuration that SageMaker puts in the training container.\n",
175-
"Configuration provided to the SageMaker python SDK when creating a job will be passed on to the hook.\n",
176-
"This allows you to use the same script with different configurations across different runs.\n",
177-
"\n",
178-
"If you use an official SageMaker Framework container (i.e. AWS Deep Learning Container), you do not have to orchestrate your script as below. Hooks are automatically added in those environments. This experience is called a \"zero script change\". For more information, see https://github.com/awslabs/sagemaker-debugger/blob/master/docs/sagemaker.md#zero-script-change. An example of the same is provided at https://github.com/awslabs/amazon-sagemaker-examples/sagemaker-debugger/tensorflow2/tensorflow2_zero_code_change.\n",
179-
"\"\"\"\n",
180-
"\n",
181-
"# Standard Library\n",
182-
"import argparse\n",
183-
"import random\n",
184-
"\n",
185-
"# Third Party\n",
186-
"import numpy as np\n",
187-
"import tensorflow.compat.v2 as tf\n",
188-
"from tensorflow.keras.applications.resnet50 import ResNet50\n",
189-
"from tensorflow.keras.datasets import cifar10\n",
190-
"from tensorflow.keras.utils import to_categorical\n",
191-
"\n",
192-
"# smdebug modification: Import smdebug support for Tensorflow\n",
193-
"import smdebug.tensorflow as smd\n",
194-
"\n",
195-
"\n",
196-
"def train(batch_size, epoch, model, hook):\n",
197-
" (X_train, y_train), (X_valid, y_valid) = cifar10.load_data()\n",
198-
"\n",
199-
" Y_train = to_categorical(y_train, 10)\n",
200-
" Y_valid = to_categorical(y_valid, 10)\n",
201-
"\n",
202-
" X_train = X_train.astype('float32')\n",
203-
" X_valid = X_valid.astype('float32')\n",
204-
"\n",
205-
" mean_image = np.mean(X_train, axis=0)\n",
206-
" X_train -= mean_image\n",
207-
" X_valid -= mean_image\n",
208-
" X_train /= 128.\n",
209-
" X_valid /= 128.\n",
210-
" \n",
211-
" # register hook to save the following scalar values\n",
212-
" hook.save_scalar(\"epoch\", epoch)\n",
213-
" hook.save_scalar(\"batch_size\", batch_size)\n",
214-
" hook.save_scalar(\"train_steps_per_epoch\", len(X_train)/batch_size)\n",
215-
" hook.save_scalar(\"valid_steps_per_epoch\", len(X_valid)/batch_size)\n",
216-
" \n",
217-
" model.fit(X_train, Y_train,\n",
218-
" batch_size=batch_size,\n",
219-
" epochs=epoch,\n",
220-
" validation_data=(X_valid, Y_valid),\n",
221-
" shuffle=False,\n",
222-
" # smdebug modification: Pass the hook as a Keras callback\n",
223-
" callbacks=[hook])\n",
224-
"\n",
225-
"\n",
226-
"def main():\n",
227-
" parser = argparse.ArgumentParser(description=\"Train resnet50 cifar10\")\n",
228-
" parser.add_argument(\"--batch_size\", type=int, default=50)\n",
229-
" parser.add_argument(\"--epoch\", type=int, default=15)\n",
230-
" parser.add_argument(\"--model_dir\", type=str, default=\"./model_keras_resnet\")\n",
231-
" parser.add_argument(\"--lr\", type=float, default=0.001)\n",
232-
" parser.add_argument(\"--random_seed\", type=bool, default=False)\n",
233-
" \n",
234-
" args = parser.parse_args()\n",
235-
"\n",
236-
" if args.random_seed:\n",
237-
" tf.random.set_seed(2)\n",
238-
" np.random.seed(2)\n",
239-
" random.seed(12)\n",
240-
"\n",
241-
" \n",
242-
" mirrored_strategy = tf.distribute.MirroredStrategy()\n",
243-
" with mirrored_strategy.scope():\n",
244-
" \n",
245-
" model = ResNet50(weights=None, input_shape=(32,32,3), classes=10)\n",
246-
"\n",
247-
" # smdebug modification:\n",
248-
" # Create hook from the configuration provided through sagemaker python sdk.\n",
249-
" # This configuration is provided in the form of a JSON file.\n",
250-
" # Default JSON configuration file:\n",
251-
" # {\n",
252-
" # \"LocalPath\": <path on device where tensors will be saved>\n",
253-
" # }\"\n",
254-
" # Alternatively, you could pass custom debugger configuration (using DebuggerHookConfig)\n",
255-
" # through SageMaker Estimator. For more information, https://github.com/aws/sagemaker-python-sdk/blob/master/doc/amazon_sagemaker_debugger.rst\n",
256-
" hook = smd.KerasHook.create_from_json_file()\n",
257-
"\n",
258-
" opt = tf.keras.optimizers.Adam(learning_rate=args.lr)\n",
259-
" model.compile(loss='categorical_crossentropy',\n",
260-
" optimizer=opt,\n",
261-
" metrics=['accuracy'])\n",
262-
"\n",
263-
" # start the training.\n",
264-
" train(args.batch_size, args.epoch, model, hook)\n",
265-
"\n",
266-
"if __name__ == \"__main__\":\n",
267-
" main()\n"
160+
"\u001b[33m\"\"\"\u001b[39;49;00m\r\n",
161+
"\u001b[33mThis script is a ResNet training script which uses Tensorflow's Keras interface, and provides an example of how to use SageMaker Debugger when you use your own custom container in SageMaker or your own script outside SageMaker.\u001b[39;49;00m\r\n",
162+
"\u001b[33mIt has been orchestrated with SageMaker Debugger hooks to allow saving tensors during training.\u001b[39;49;00m\r\n",
163+
"\u001b[33mThese hooks have been instrumented to read from a JSON configuration that SageMaker puts in the training container.\u001b[39;49;00m\r\n",
164+
"\u001b[33mConfiguration provided to the SageMaker python SDK when creating a job will be passed on to the hook.\u001b[39;49;00m\r\n",
165+
"\u001b[33mThis allows you to use the same script with different configurations across different runs.\u001b[39;49;00m\r\n",
166+
"\u001b[33m\u001b[39;49;00m\r\n",
167+
"\u001b[33mIf you use an official SageMaker Framework container (i.e. AWS Deep Learning Container), you do not have to orchestrate your script as below. Hooks are automatically added in those environments. This experience is called a \"zero script change\". For more information, see https://github.com/awslabs/sagemaker-debugger/blob/master/docs/sagemaker.md#zero-script-change. An example of the same is provided at https://github.com/awslabs/amazon-sagemaker-examples/sagemaker-debugger/tensorflow2/tensorflow2_zero_code_change.\u001b[39;49;00m\r\n",
168+
"\u001b[33m\"\"\"\u001b[39;49;00m\r\n",
169+
"\r\n",
170+
"\u001b[37m# Standard Library\u001b[39;49;00m\r\n",
171+
"\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36margparse\u001b[39;49;00m\r\n",
172+
"\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mrandom\u001b[39;49;00m\r\n",
173+
"\r\n",
174+
"\u001b[37m# Third Party\u001b[39;49;00m\r\n",
175+
"\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mnumpy\u001b[39;49;00m \u001b[34mas\u001b[39;49;00m \u001b[04m\u001b[36mnp\u001b[39;49;00m\r\n",
176+
"\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mtensorflow\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mcompat\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mv2\u001b[39;49;00m \u001b[34mas\u001b[39;49;00m \u001b[04m\u001b[36mtf\u001b[39;49;00m\r\n",
177+
"\u001b[34mfrom\u001b[39;49;00m \u001b[04m\u001b[36mtensorflow\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mkeras\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mapplications\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mresnet50\u001b[39;49;00m \u001b[34mimport\u001b[39;49;00m ResNet50\r\n",
178+
"\u001b[34mfrom\u001b[39;49;00m \u001b[04m\u001b[36mtensorflow\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mkeras\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mdatasets\u001b[39;49;00m \u001b[34mimport\u001b[39;49;00m cifar10\r\n",
179+
"\u001b[34mfrom\u001b[39;49;00m \u001b[04m\u001b[36mtensorflow\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mkeras\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mutils\u001b[39;49;00m \u001b[34mimport\u001b[39;49;00m to_categorical\r\n",
180+
"\r\n",
181+
"\u001b[37m# smdebug modification: Import smdebug support for Tensorflow\u001b[39;49;00m\r\n",
182+
"\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36msmdebug\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mtensorflow\u001b[39;49;00m \u001b[34mas\u001b[39;49;00m \u001b[04m\u001b[36msmd\u001b[39;49;00m\r\n",
183+
"\r\n",
184+
"\r\n",
185+
"\u001b[34mdef\u001b[39;49;00m \u001b[32mtrain\u001b[39;49;00m(batch_size, epoch, model, hook):\r\n",
186+
" (X_train, y_train), (X_valid, y_valid) = cifar10.load_data()\r\n",
187+
"\r\n",
188+
" Y_train = to_categorical(y_train, \u001b[34m10\u001b[39;49;00m)\r\n",
189+
" Y_valid = to_categorical(y_valid, \u001b[34m10\u001b[39;49;00m)\r\n",
190+
"\r\n",
191+
" X_train = X_train.astype(\u001b[33m'\u001b[39;49;00m\u001b[33mfloat32\u001b[39;49;00m\u001b[33m'\u001b[39;49;00m)\r\n",
192+
" X_valid = X_valid.astype(\u001b[33m'\u001b[39;49;00m\u001b[33mfloat32\u001b[39;49;00m\u001b[33m'\u001b[39;49;00m)\r\n",
193+
"\r\n",
194+
" mean_image = np.mean(X_train, axis=\u001b[34m0\u001b[39;49;00m)\r\n",
195+
" X_train -= mean_image\r\n",
196+
" X_valid -= mean_image\r\n",
197+
" X_train /= \u001b[34m128.\u001b[39;49;00m\r\n",
198+
" X_valid /= \u001b[34m128.\u001b[39;49;00m\r\n",
199+
" \r\n",
200+
" \u001b[37m# register hook to save the following scalar values\u001b[39;49;00m\r\n",
201+
" hook.save_scalar(\u001b[33m\"\u001b[39;49;00m\u001b[33mepoch\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, epoch)\r\n",
202+
" hook.save_scalar(\u001b[33m\"\u001b[39;49;00m\u001b[33mbatch_size\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, batch_size)\r\n",
203+
" hook.save_scalar(\u001b[33m\"\u001b[39;49;00m\u001b[33mtrain_steps_per_epoch\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mlen\u001b[39;49;00m(X_train)/batch_size)\r\n",
204+
" hook.save_scalar(\u001b[33m\"\u001b[39;49;00m\u001b[33mvalid_steps_per_epoch\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mlen\u001b[39;49;00m(X_valid)/batch_size)\r\n",
205+
" \r\n",
206+
" model.fit(X_train, Y_train,\r\n",
207+
" batch_size=batch_size,\r\n",
208+
" epochs=epoch,\r\n",
209+
" validation_data=(X_valid, Y_valid),\r\n",
210+
" shuffle=\u001b[34mFalse\u001b[39;49;00m,\r\n",
211+
" \u001b[37m# smdebug modification: Pass the hook as a Keras callback\u001b[39;49;00m\r\n",
212+
" callbacks=[hook])\r\n",
213+
"\r\n",
214+
"\r\n",
215+
"\u001b[34mdef\u001b[39;49;00m \u001b[32mmain\u001b[39;49;00m():\r\n",
216+
" parser = argparse.ArgumentParser(description=\u001b[33m\"\u001b[39;49;00m\u001b[33mTrain resnet50 cifar10\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\r\n",
217+
" parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--batch_size\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mint\u001b[39;49;00m, default=\u001b[34m50\u001b[39;49;00m)\r\n",
218+
" parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--epoch\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mint\u001b[39;49;00m, default=\u001b[34m15\u001b[39;49;00m)\r\n",
219+
" parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--model_dir\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mstr\u001b[39;49;00m, default=\u001b[33m\"\u001b[39;49;00m\u001b[33m./model_keras_resnet\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\r\n",
220+
" parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--lr\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mfloat\u001b[39;49;00m, default=\u001b[34m0.001\u001b[39;49;00m)\r\n",
221+
" parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--random_seed\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mbool\u001b[39;49;00m, default=\u001b[34mFalse\u001b[39;49;00m)\r\n",
222+
" \r\n",
223+
" args = parser.parse_args()\r\n",
224+
"\r\n",
225+
" \u001b[34mif\u001b[39;49;00m args.random_seed:\r\n",
226+
" tf.random.set_seed(\u001b[34m2\u001b[39;49;00m)\r\n",
227+
" np.random.seed(\u001b[34m2\u001b[39;49;00m)\r\n",
228+
" random.seed(\u001b[34m12\u001b[39;49;00m)\r\n",
229+
"\r\n",
230+
" \r\n",
231+
" mirrored_strategy = tf.distribute.MirroredStrategy()\r\n",
232+
" \u001b[34mwith\u001b[39;49;00m mirrored_strategy.scope():\r\n",
233+
" \r\n",
234+
" model = ResNet50(weights=\u001b[34mNone\u001b[39;49;00m, input_shape=(\u001b[34m32\u001b[39;49;00m,\u001b[34m32\u001b[39;49;00m,\u001b[34m3\u001b[39;49;00m), classes=\u001b[34m10\u001b[39;49;00m)\r\n",
235+
"\r\n",
236+
" \u001b[37m# smdebug modification:\u001b[39;49;00m\r\n",
237+
" \u001b[37m# Create hook from the configuration provided through sagemaker python sdk.\u001b[39;49;00m\r\n",
238+
" \u001b[37m# This configuration is provided in the form of a JSON file.\u001b[39;49;00m\r\n",
239+
" \u001b[37m# Default JSON configuration file:\u001b[39;49;00m\r\n",
240+
" \u001b[37m# {\u001b[39;49;00m\r\n",
241+
" \u001b[37m# \"LocalPath\": <path on device where tensors will be saved>\u001b[39;49;00m\r\n",
242+
" \u001b[37m# }\"\u001b[39;49;00m\r\n",
243+
" \u001b[37m# Alternatively, you could pass custom debugger configuration (using DebuggerHookConfig)\u001b[39;49;00m\r\n",
244+
" \u001b[37m# through SageMaker Estimator. For more information, https://github.com/aws/sagemaker-python-sdk/blob/master/doc/amazon_sagemaker_debugger.rst\u001b[39;49;00m\r\n",
245+
" hook = smd.KerasHook.create_from_json_file()\r\n",
246+
"\r\n",
247+
" opt = tf.keras.optimizers.Adam(learning_rate=args.lr)\r\n",
248+
" model.compile(loss=\u001b[33m'\u001b[39;49;00m\u001b[33mcategorical_crossentropy\u001b[39;49;00m\u001b[33m'\u001b[39;49;00m,\r\n",
249+
" optimizer=opt,\r\n",
250+
" metrics=[\u001b[33m'\u001b[39;49;00m\u001b[33maccuracy\u001b[39;49;00m\u001b[33m'\u001b[39;49;00m])\r\n",
251+
"\r\n",
252+
" \u001b[37m# start the training.\u001b[39;49;00m\r\n",
253+
" train(args.batch_size, args.epoch, model, hook)\r\n",
254+
"\r\n",
255+
"\u001b[34mif\u001b[39;49;00m \u001b[31m__name__\u001b[39;49;00m == \u001b[33m\"\u001b[39;49;00m\u001b[33m__main__\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m:\r\n",
256+
" main()\r\n"
268257
]
269258
}
270259
],
271260
"source": [
272-
"! cat docker/tf_keras_resnet_byoc.py"
261+
"! pygmentize docker/tf_keras_resnet_byoc.py"
273262
]
274263
},
275264
{
@@ -451,7 +440,7 @@
451440
"\n",
452441
"Construct a SageMaker Estimator using the image URI of the custom training container you created in **Step 3**.\n",
453442
"\n",
454-
"**Note:** This example uses the SageMaker Python SDK v2. If you want to use the SageMaker Python SDK v1, you need to change the parameter names. You can find the SageMaker Estimator parameters at [Get Started with Custom Training Containers](https://docs.aws.amazon.com/sagemaker/latest/dg/build-container-to-train-script-get-started.html#byoc-training-step5) in the AWS SageMaker Developer Guide or at [the SageMaker Estimator API](https://sagemaker.readthedocs.io/en/stable/api/training/estimators.html) in one of the older version of SageMaker Python SDK documentation."
443+
"**Note:** This example uses the SageMaker Python SDK v1. If you want to use the SageMaker Python SDK v2, you need to change the parameter names. You can find the SageMaker Estimator parameters at [Get Started with Custom Training Containers](https://docs.aws.amazon.com/sagemaker/latest/dg/build-container-to-train-script-get-started.html#byoc-training-step5) in the AWS SageMaker Developer Guide or at [the SageMaker Estimator API](https://sagemaker.readthedocs.io/en/stable/api/training/estimators.html) in one of the older version of SageMaker Python SDK documentation."
455444
]
456445
},
457446
{
@@ -462,15 +451,14 @@
462451
"source": [
463452
"from sagemaker.estimator import Estimator\n",
464453
"from sagemaker import get_execution_role\n",
465-
"from sagemaker.tensorflow import TensorFlow\n",
466454
"\n",
467455
"role = get_execution_role()\n",
468456
"\n",
469457
"estimator = Estimator(\n",
470-
" image_uri=byoc_image_uri,\n",
458+
" image_name=byoc_image_uri,\n",
471459
" role=role,\n",
472-
" instance_count=1,\n",
473-
" instance_type=\"ml.p3.16xlarge\",\n",
460+
" train_instance_count=1,\n",
461+
" train_instance_type=\"ml.p3.16xlarge\",\n",
474462
"\n",
475463
" # Debugger-specific parameters\n",
476464
" rules = rules,\n",
@@ -612,6 +600,8 @@
612600
"metadata": {},
613601
"outputs": [],
614602
"source": [
603+
"from smdebug.core.modes import ModeKeys\n",
604+
"\n",
615605
"len(trial.tensor('loss').steps(mode=ModeKeys.TRAIN))"
616606
]
617607
},
@@ -672,7 +662,6 @@
672662
"source": [
673663
"import matplotlib.pyplot as plt\n",
674664
"import numpy as np\n",
675-
"from smdebug.core.modes import ModeKeys\n",
676665
"\n",
677666
"# Retrieve the loss tensors collected in training mode\n",
678667
"y = []\n",

0 commit comments

Comments
 (0)