Skip to content

Commit 7b3830b

Browse files
Enhance Auto3DSeg hpo_nni notebook and minor fix on autorunner notebook (#1046)
- In hpo_nni notebook, highlight the user needs the nni package to run the tutorial - Minor fix and unify the way to download the dataset as most of other notebooks do - Fix auto_runner notebook issue from last merge - Unify the variable names as suggested in Project-MONAI/MONAI#5374 ### Checks <!--- Put an `x` in all the boxes that apply, and remove the not applicable items --> - [ ] Notebook runs automatically `./runner [-p <regex_pattern>]` Signed-off-by: Mingxin Zheng <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent f5854ea commit 7b3830b

File tree

2 files changed

+126
-277
lines changed

2 files changed

+126
-277
lines changed

auto3dseg/notebooks/auto_runner.ipynb

Lines changed: 53 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
"\n",
1111
"Specifically, it will show the features below:\n",
1212
"1. Use `AutoRunner` with an input config file `input.yaml` example\n",
13-
"2. How to prepare an `input.yaml`\n",
14-
"3. How to configure the input/ouput folders\n",
13+
"2. How to prepare the config file `input.yaml`\n",
14+
"3. How to configure the paths for inputs, outputs, and intermediate results\n",
1515
"4. How to set the internal parameters of **Auto3DSeg** components\n",
16-
"5. How to apply hyper parameter optimization\n",
16+
"5. How to use a 3rd party hyper parameter optimization(HPO) package with `AutoRunner`\n",
1717
"\n",
1818
"## Setup environment"
1919
]
@@ -36,18 +36,9 @@
3636
},
3737
{
3838
"cell_type": "code",
39-
"execution_count": 2,
40-
"metadata": {},
41-
"outputs": [
42-
{
43-
"name": "stderr",
44-
"output_type": "stream",
45-
"text": [
46-
"/opt/conda/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
47-
" from .autonotebook import tqdm as notebook_tqdm\n"
48-
]
49-
}
50-
],
39+
"execution_count": null,
40+
"metadata": {},
41+
"outputs": [],
5142
"source": [
5243
"import os\n",
5344
"import tempfile\n",
@@ -82,7 +73,7 @@
8273
"\n",
8374
"compressed_file = os.path.join(root_dir, msd_task + \".tar\")\n",
8475
"dataroot = os.path.join(root_dir, msd_task)\n",
85-
"if os.path.exists(dataroot):\n",
76+
"if not os.path.exists(dataroot):\n",
8677
" download_and_extract(resource, compressed_file, root_dir)\n",
8778
"\n",
8879
"datalist_file = os.path.join(\"..\", \"tasks\", \"msd\", msd_task, \"msd_\" + msd_task.lower() + \"_folds.json\")"
@@ -97,19 +88,19 @@
9788
},
9889
{
9990
"cell_type": "code",
100-
"execution_count": 4,
91+
"execution_count": null,
10192
"metadata": {},
10293
"outputs": [],
10394
"source": [
104-
"data_src_cfg = {\n",
95+
"input_cfg = {\n",
10596
" \"name\": msd_task, # optional, it is only for your own record\n",
10697
" \"task\": \"segmentation\", # optional, it is only for your own record\n",
10798
" \"modality\": \"MRI\", # required\n",
10899
" \"datalist\": datalist_file, # required\n",
109100
" \"dataroot\": dataroot, # required\n",
110101
"}\n",
111102
"input = './input.yaml'\n",
112-
"ConfigParser.export_config_file(data_src_cfg, input)"
103+
"ConfigParser.export_config_file(input_cfg, input)"
113104
]
114105
},
115106
{
@@ -154,7 +145,7 @@
154145
"metadata": {},
155146
"outputs": [],
156147
"source": [
157-
"runner = AutoRunner(input=data_src_cfg)\n",
148+
"runner = AutoRunner(input=input_cfg)\n",
158149
"# runner.run()"
159150
]
160151
},
@@ -169,21 +160,9 @@
169160
},
170161
{
171162
"cell_type": "code",
172-
"execution_count": 7,
173-
"metadata": {},
174-
"outputs": [
175-
{
176-
"name": "stdout",
177-
"output_type": "stream",
178-
"text": [
179-
"2022-10-18 08:11:37,812 - INFO - ./my_workspace does not exists. Creating...\n",
180-
"2022-10-18 08:11:37,813 - INFO - ./my_workspace created to save all results\n",
181-
"2022-10-18 08:11:37,815 - INFO - Loading ./input.yaml for AutoRunner and making a copy in /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/my_workspace/input.yaml\n",
182-
"2022-10-18 08:11:37,818 - INFO - The output_dir is not specified. /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/my_workspace/ensemble_output will be used to save ensemble predictions\n",
183-
"2022-10-18 08:11:37,819 - INFO - Directory /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/my_workspace/ensemble_output is created to save ensemble predictions\n"
184-
]
185-
}
186-
],
163+
"execution_count": null,
164+
"metadata": {},
165+
"outputs": [],
187166
"source": [
188167
"runner = AutoRunner(work_dir='./my_workspace', input=input)\n",
189168
"# runner.run()"
@@ -203,19 +182,9 @@
203182
},
204183
{
205184
"cell_type": "code",
206-
"execution_count": 8,
207-
"metadata": {},
208-
"outputs": [
209-
{
210-
"name": "stdout",
211-
"output_type": "stream",
212-
"text": [
213-
"2022-10-18 08:11:37,936 - INFO - Work directory ./work_dir is used to save all results\n",
214-
"2022-10-18 08:11:37,938 - INFO - Loading ./input.yaml for AutoRunner and making a copy in /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/work_dir/input.yaml\n",
215-
"2022-10-18 08:11:37,940 - INFO - The output_dir is not specified. /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/work_dir/ensemble_output will be used to save ensemble predictions\n"
216-
]
217-
}
218-
],
185+
"execution_count": null,
186+
"metadata": {},
187+
"outputs": [],
219188
"source": [
220189
"# This will restart from scratch and not use any cached results\n",
221190
"runner = AutoRunner(input=input, not_use_cache=True)\n",
@@ -238,19 +207,9 @@
238207
},
239208
{
240209
"cell_type": "code",
241-
"execution_count": 9,
242-
"metadata": {},
243-
"outputs": [
244-
{
245-
"name": "stdout",
246-
"output_type": "stream",
247-
"text": [
248-
"2022-10-18 08:11:38,055 - INFO - Work directory ./work_dir is used to save all results\n",
249-
"2022-10-18 08:11:38,057 - INFO - Loading ./input.yaml for AutoRunner and making a copy in /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/work_dir/input.yaml\n",
250-
"2022-10-18 08:11:38,061 - INFO - Directory ./output_dir is created to save ensemble predictions\n"
251-
]
252-
}
253-
],
210+
"execution_count": null,
211+
"metadata": {},
212+
"outputs": [],
254213
"source": [
255214
"runner = AutoRunner(input=input, output_dir='./output_dir')\n",
256215
"# runner.run()"
@@ -268,19 +227,9 @@
268227
},
269228
{
270229
"cell_type": "code",
271-
"execution_count": 10,
272-
"metadata": {},
273-
"outputs": [
274-
{
275-
"name": "stdout",
276-
"output_type": "stream",
277-
"text": [
278-
"2022-10-18 08:11:38,182 - INFO - Work directory ./work_dir is used to save all results\n",
279-
"2022-10-18 08:11:38,186 - INFO - Loading ./input.yaml for AutoRunner and making a copy in /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/work_dir/input.yaml\n",
280-
"2022-10-18 08:11:38,194 - INFO - The output_dir is not specified. /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/work_dir/ensemble_output will be used to save ensemble predictions\n"
281-
]
282-
}
283-
],
230+
"execution_count": null,
231+
"metadata": {},
232+
"outputs": [],
284233
"source": [
285234
"runner = AutoRunner(input=input)\n",
286235
"runner.set_num_fold(num_fold=2)\n",
@@ -302,13 +251,16 @@
302251
},
303252
{
304253
"cell_type": "code",
305-
"execution_count": 12,
254+
"execution_count": null,
306255
"metadata": {},
307256
"outputs": [],
308257
"source": [
309-
"max_epochs = 2000\n",
258+
"max_epochs = 2\n",
310259
"\n",
311-
"num_gpus = 1 if \"multigpu\" in data_src_cfg and not data_src_cfg[\"multigpu\"] else torch.cuda.device_count()\n",
260+
"# safeguard to ensure max_epochs is greater or equal to 2\n",
261+
"max_epochs = max(max_epochs, 2)\n",
262+
"\n",
263+
"num_gpus = 1 if \"multigpu\" in input_cfg and not input_cfg[\"multigpu\"] else torch.cuda.device_count()\n",
312264
"\n",
313265
"num_epoch = max_epochs\n",
314266
"num_images_per_batch = 2\n",
@@ -326,7 +278,7 @@
326278
"}\n",
327279
"runner = AutoRunner(input=input)\n",
328280
"runner.set_training_params(params=train_param)\n",
329-
"# runner.run()\n"
281+
"# runner.run()"
330282
]
331283
},
332284
{
@@ -340,19 +292,9 @@
340292
},
341293
{
342294
"cell_type": "code",
343-
"execution_count": 13,
344-
"metadata": {},
345-
"outputs": [
346-
{
347-
"name": "stdout",
348-
"output_type": "stream",
349-
"text": [
350-
"2022-10-18 08:11:38,613 - INFO - Work directory ./work_dir is used to save all results\n",
351-
"2022-10-18 08:11:38,615 - INFO - Loading ./input.yaml for AutoRunner and making a copy in /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/work_dir/input.yaml\n",
352-
"2022-10-18 08:11:38,618 - INFO - The output_dir is not specified. /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/work_dir/ensemble_output will be used to save ensemble predictions\n"
353-
]
354-
}
355-
],
295+
"execution_count": null,
296+
"metadata": {},
297+
"outputs": [],
356298
"source": [
357299
"runner = AutoRunner(input=input)\n",
358300
"runner.set_ensemble_method(ensemble_method_name=\"AlgoEnsembleBestByFold\")\n",
@@ -368,19 +310,9 @@
368310
},
369311
{
370312
"cell_type": "code",
371-
"execution_count": 14,
372-
"metadata": {},
373-
"outputs": [
374-
{
375-
"name": "stdout",
376-
"output_type": "stream",
377-
"text": [
378-
"2022-10-18 08:11:38,783 - INFO - Work directory ./work_dir is used to save all results\n",
379-
"2022-10-18 08:11:38,784 - INFO - Loading ./input.yaml for AutoRunner and making a copy in /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/work_dir/input.yaml\n",
380-
"2022-10-18 08:11:38,786 - INFO - The output_dir is not specified. /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/work_dir/ensemble_output will be used to save ensemble predictions\n"
381-
]
382-
}
383-
],
313+
"execution_count": null,
314+
"metadata": {},
315+
"outputs": [],
384316
"source": [
385317
"# set model ensemble method\n",
386318
"pred_params = {\n",
@@ -397,30 +329,25 @@
397329
"cell_type": "markdown",
398330
"metadata": {},
399331
"source": [
400-
"## Train model with HPO (NNI Grid-search)\n",
332+
"## Train model with HPO\n",
401333
"\n",
402334
"**Auto3DSeg** supports hyper parameter optimization (HPO) via `NNI` and `Optuna` backends.\n",
335+
"If you wound like to the use `Optuna`, please check the [notebook](hpo_optuna.ipynb) for detailed usage.\n",
336+
"\n",
337+
"Here we demonstrate the HPO option with `NNI` by Microsoft.\n",
338+
"Please install it via `pip install nni` if you hope to execute HPO with it in tutorial and haven't done so in the beginning of the notebook.\n",
403339
"AutoRunner supports `NNI` backend with a grid search method via automatically generating a the `NNI` config and run `nnictl` commands in subprocess.\n",
404340
"\n",
405-
"Note: to run the HPO, you need to ensure the development environment has `nni` package.\n",
406-
"Please refer to the [MONAI Installation Guide](https://docs.monai.io/en/stable/installation.html#installing-the-recommended-dependencies) for how to install the recommended dependencies."
341+
"## Use `AutoRunner` with `NNI` backend to perform grid-search\n",
342+
"\n",
343+
"After `runner.run()` is executed, `nni` will attempt to start a web service using port 8088 by default. If you are running the tutorial in a remote host, please make sure the port is available on the system."
407344
]
408345
},
409346
{
410347
"cell_type": "code",
411-
"execution_count": 15,
412-
"metadata": {},
413-
"outputs": [
414-
{
415-
"name": "stdout",
416-
"output_type": "stream",
417-
"text": [
418-
"2022-10-18 08:11:38,907 - INFO - Work directory ./work_dir is used to save all results\n",
419-
"2022-10-18 08:11:38,908 - INFO - Loading ./input.yaml for AutoRunner and making a copy in /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/work_dir/input.yaml\n",
420-
"2022-10-18 08:11:38,910 - INFO - The output_dir is not specified. /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/work_dir/ensemble_output will be used to save ensemble predictions\n"
421-
]
422-
}
423-
],
348+
"execution_count": null,
349+
"metadata": {},
350+
"outputs": [],
424351
"source": [
425352
"runner = AutoRunner(input=input, hpo=True)\n",
426353
"search_space = {\"learning_rate\": {\"_type\": \"choice\", \"_value\": [0.0001, 0.001, 0.01, 0.1]}}\n",
@@ -451,19 +378,9 @@
451378
},
452379
{
453380
"cell_type": "code",
454-
"execution_count": 16,
455-
"metadata": {},
456-
"outputs": [
457-
{
458-
"name": "stdout",
459-
"output_type": "stream",
460-
"text": [
461-
"2022-10-18 08:11:39,042 - INFO - Work directory ./work_dir is used to save all results\n",
462-
"2022-10-18 08:11:39,046 - INFO - Loading ./input.yaml for AutoRunner and making a copy in /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/work_dir/input.yaml\n",
463-
"2022-10-18 08:11:39,054 - INFO - The output_dir is not specified. /workspace/monai/tutorials-in-dev/auto3dseg/notebooks/work_dir/ensemble_output will be used to save ensemble predictions\n"
464-
]
465-
}
466-
],
381+
"execution_count": null,
382+
"metadata": {},
383+
"outputs": [],
467384
"source": [
468385
"runner = AutoRunner(input=input, hpo=True)\n",
469386
"hpo_params = {\"maxTrialNumber\": 20}\n",
@@ -496,7 +413,7 @@
496413
],
497414
"metadata": {
498415
"kernelspec": {
499-
"display_name": "Python 3.8.10 64-bit",
416+
"display_name": "Python 3.8.13 ('base')",
500417
"language": "python",
501418
"name": "python3"
502419
},
@@ -510,12 +427,12 @@
510427
"name": "python",
511428
"nbconvert_exporter": "python",
512429
"pygments_lexer": "ipython3",
513-
"version": "3.8.10"
430+
"version": "3.8.13"
514431
},
515432
"orig_nbformat": 4,
516433
"vscode": {
517434
"interpreter": {
518-
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
435+
"hash": "d4d1e4263499bec80672ea0156c357c1ee493ec2b1c70f0acce89fc37c4a6abe"
519436
}
520437
}
521438
},

0 commit comments

Comments
 (0)