Project-MONAI · wyli · Apr 29, 2022 · Apr 25, 2022 · Apr 26, 2022 · Apr 26, 2022
diff --git a/modules/bundles/get_started.ipynb b/modules/bundles/get_started.ipynb
@@ -164,8 +164,6 @@
     "        \"$import os\",\n",
     "        \"$import ignite\"\n",
     "    ],\n",
-    "    \"determinism\": \"$monai.utils.set_determinism(seed=123)\",\n",
-    "    \"cudnn_opt\": \"$setattr(torch.backends.cudnn, 'benchmark', True)\",\n",
     "    \"device\": \"$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\",\n",
     "    \"ckpt_path\": \"/workspace/data/models/model.pt\",\n",
     "    \"dataset_dir\": \"/workspace/data/Task09_Spleen\",\n",
@@ -513,14 +511,29 @@
     "\n",
     "There are several predefined scripts in MONAI bundle module to help execute `regular training`, `metadata verification base on schema`, `network input / output verification`, `export to TorchScript model`, etc.\n",
     "\n",
-    "Here we leverage the `run` script and specify the ID of trainer in the config."
+    "Here we leverage the `run` script and specify the ID of trainer in the config.\n",
+    "\n",
+    "Just define the entry point expressions in the config to execute in order, and specify the `runner_id` in CLI script."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```json\n",
+    "\"training\": [\n",
+    "    \"$monai.utils.set_determinism(seed=123)\",\n",
+    "    \"$setattr(torch.backends.cudnn, 'benchmark', True)\",\n",
+    "    \"$@train#trainer.run()\"\n",
+    "]\n",
+    "```"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "`python -m monai.bundle run \"'train#trainer'\" --config_file configs/train.json`"
+    "`python -m monai.bundle run training --config_file configs/train.json`"
    ]
   },
   {
@@ -538,7 +551,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "`python -m monai.bundle run \"'train#trainer'\" --config_file configs/train.json --device \"\\$torch.device('cuda:1')\"`"
+    "`python -m monai.bundle run training --config_file configs/train.json --device \"\\$torch.device('cuda:1')\"`"
    ]
   },
   {
@@ -552,7 +565,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "`python -m monai.bundle run \"'train#trainer'\" --config_file configs/train.json --network \"%configs/test.json#network\"`"
+    "`python -m monai.bundle run training --config_file configs/train.json --network \"%configs/test.json#network\"`"
    ]
   },
   {

diff --git a/modules/bundles/spleen_segmentation/configs/evaluate.json b/modules/bundles/spleen_segmentation/configs/evaluate.json
@@ -0,0 +1,58 @@
+{
+    "validate#postprocessing":{
+        "_target_": "Compose",
+        "transforms": [
+            {
+                "_target_": "Activationsd",
+                "keys": "pred",
+                "softmax": true
+            },
+            {
+                "_target_": "Invertd",
+                "keys": ["pred", "label"],
+                "transform": "@validate#preprocessing",
+                "orig_keys": "image",
+                "meta_key_postfix": "meta_dict",
+                "nearest_interp": [false, true],
+                "to_tensor": true
+            },
+            {
+              "_target_": "AsDiscreted",
+                "keys": ["pred", "label"],
+                "argmax": [true, false],
+                "to_onehot": 2
+            },
+            {
+              "_target_": "SaveImaged",
+                "keys": "pred",
+                "meta_keys": "pred_meta_dict",
+                "output_dir": "@output_dir",
+                "resample": false,
+                "squeeze_end_dims": true
+            }
+        ]
+    },
+    "validate#handlers": [
+        {
+            "_target_": "CheckpointLoader",
+            "load_path": "$@ckpt_dir + '/model.pt'",
+            "load_dict": {"model": "@network"}
+        },
+        {
+            "_target_": "StatsHandler",
+            "iteration_log": false
+        },
+        {
+            "_target_": "MetricsSaver",
+            "save_dir": "@output_dir",
+            "metrics": ["val_mean_dice", "val_acc"],
+            "metric_details": ["val_mean_dice"],
+            "batch_transform": "$monai.handlers.from_engine(['image_meta_dict'])",
+            "summary_ops": "*"
+        }
+      ],
+    "evaluating": [
+        "$setattr(torch.backends.cudnn, 'benchmark', True)",
+        "$@validate#evaluator.run()"
+    ]
+}
diff --git a/modules/bundles/spleen_segmentation/configs/inference.json b/modules/bundles/spleen_segmentation/configs/inference.json
@@ -3,12 +3,11 @@
         "$import glob",
         "$import os"
     ],
-    "cudnn_opt": "$setattr(torch.backends.cudnn, 'benchmark', True)",
-    "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
-    "ckpt_path": "/workspace/data/tutorials/modules/bundles/spleen_segmentation/models/model.pt",
-    "download_ckpt": "$monai.apps.utils.download_url('https://huggingface.co/MONAI/example_spleen_segmentation/resolve/main/model.pt', @ckpt_path)",
+    "bundle_root": "/workspace/data/tutorials/modules/bundles/spleen_segmentation",
+    "output_dir": "$@bundle_root + '/eval'",
     "dataset_dir": "/workspace/data/Task09_Spleen",
     "datalist": "$list(sorted(glob.glob(@dataset_dir + '/imagesTs/*.nii.gz')))",
+    "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
     "network_def": {
         "_target_": "UNet",
         "spatial_dims": 3,
@@ -101,16 +100,14 @@
                 "_target_": "SaveImaged",
                 "keys": "pred",
                 "meta_keys": "pred_meta_dict",
-                "output_dir": "eval"
+                "output_dir": "@output_dir"
             }
         ]
     },
     "handlers": [
         {
             "_target_": "CheckpointLoader",
-            "_requires_": "@download_ckpt",
-            "_disabled_": "$not os.path.exists(@ckpt_path)",
-            "load_path": "@ckpt_path",
+            "load_path": "$@bundle_root + '/models/model.pt'",
             "load_dict": {"model": "@network"}
         },
         {
@@ -120,13 +117,16 @@
     ],
     "evaluator": {
         "_target_": "SupervisedEvaluator",
-        "_requires_": "@cudnn_opt",
         "device": "@device",
         "val_data_loader": "@dataloader",
         "network": "@network",
         "inferer": "@inferer",
         "postprocessing": "@postprocessing",
         "val_handlers": "@handlers",
         "amp": true
-    }
+    },
+    "evaluating": [
+        "$setattr(torch.backends.cudnn, 'benchmark', True)",
+        "[email protected]()"
+    ]
 }
diff --git a/modules/bundles/spleen_segmentation/configs/multi_gpu_train.json b/modules/bundles/spleen_segmentation/configs/multi_gpu_train.json
@@ -0,0 +1,34 @@
+{
+    "device": "$torch.device(f'cuda:{dist.get_rank()}')",
+    "network": {
+        "_target_": "torch.nn.parallel.DistributedDataParallel",
+        "module": "$@network_def.to(@device)",
+        "device_ids": ["@device"]
+    },
+    "train#sampler": {
+        "_target_": "DistributedSampler",
+        "dataset": "@train#dataset",
+        "even_divisible": true,
+        "shuffle": true
+    },
+    "train#dataloader#sampler": "@train#sampler",
+    "train#dataloader#shuffle": false,
+    "train#trainer#train_handlers": "$@train#handlers[: 1 if dist.get_rank() > 0 else None]",
+    "validate#sampler": {
+        "_target_": "DistributedSampler",
+        "dataset": "@validate#dataset",
+        "even_divisible": false,
+        "shuffle": false
+    },
+    "validate#dataloader#sampler": "@validate#sampler",
+    "validate#evaluator#val_handlers": "$None if dist.get_rank() > 0 else @validate#handlers",
+    "training": [
+        "$import torch.distributed as dist",
+        "$dist.init_process_group(backend='nccl')",
+        "$torch.cuda.set_device(@device)",
+        "$monai.utils.set_determinism(seed=123)",
+        "$setattr(torch.backends.cudnn, 'benchmark', True)",
+        "$@train#trainer.run()",
+        "$dist.destroy_process_group()"
+    ]
+}
diff --git a/modules/bundles/spleen_segmentation/configs/train.json b/modules/bundles/spleen_segmentation/configs/train.json
@@ -4,13 +4,13 @@
         "$import os",
         "$import ignite"
     ],
-    "determinism": "$monai.utils.set_determinism(seed=123)",
-    "cudnn_opt": "$setattr(torch.backends.cudnn, 'benchmark', True)",
-    "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
-    "ckpt_dir": "/workspace/data/tutorials/modules/bundles/spleen_segmentation/models",
+    "bundle_root": "/workspace/data/tutorials/modules/bundles/spleen_segmentation",
+    "ckpt_dir": "$@bundle_root + '/models'",
+    "output_dir": "$@bundle_root + '/eval'",
     "dataset_dir": "/workspace/data/Task09_Spleen",
     "images": "$list(sorted(glob.glob(@dataset_dir + '/imagesTr/*.nii.gz')))",
     "labels": "$list(sorted(glob.glob(@dataset_dir + '/labelsTr/*.nii.gz')))",
+    "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
     "network_def": {
         "_target_": "UNet",
         "spatial_dims": 3,
@@ -94,7 +94,7 @@
             "_target_": "DataLoader",
             "dataset": "@train#dataset",
             "batch_size": 2,
-            "shuffle": false,
+            "shuffle": true,
             "num_workers": 4
         },
         "inferer": {
@@ -130,7 +130,7 @@
             },
             {
                 "_target_": "TensorBoardStatsHandler",
-                "log_dir": "eval",
+                "log_dir": "@output_dir",
                 "tag_name": "train_loss",
                 "output_transform": "$monai.handlers.from_engine(['loss'], first=True)"
             }
@@ -143,7 +143,6 @@
         },
         "trainer": {
             "_target_": "SupervisedTrainer",
-            "_requires_": ["@determinism", "@cudnn_opt"],
             "max_epochs": 100,
             "device": "@device",
             "train_data_loader": "@train#dataloader",
@@ -196,7 +195,7 @@
             },
             {
                 "_target_": "TensorBoardStatsHandler",
-                "log_dir": "eval",
+                "log_dir": "@output_dir",
                 "iteration_log": false
             },
             {
@@ -232,5 +231,10 @@
             "val_handlers": "@validate#handlers",
             "amp": true
         }
-    }
+    },
+    "training": [
+        "$monai.utils.set_determinism(seed=123)",
+        "$setattr(torch.backends.cudnn, 'benchmark', True)",
+        "$@train#trainer.run()"
+    ]
 }
diff --git a/modules/bundles/spleen_segmentation/docs/README.md b/modules/bundles/spleen_segmentation/docs/README.md
@@ -26,13 +26,25 @@ Mean Dice = 0.96
 Execute training:
 
 ```
-python -m monai.bundle run "'train#trainer'" --meta_file configs/metadata.json --config_file configs/train.json --logging_file configs/logging.conf
+python -m monai.bundle run training --meta_file configs/metadata.json --config_file configs/train.json --logging_file configs/logging.conf
+```
+
+Override the `train` config to execute multi-GPU training:
+
+```
+torchrun --standalone --nnodes=1 --nproc_per_node=2 -m monai.bundle run training --meta_file configs/metadata.json --config_file "['configs/train.json','configs/multi_gpu_train.json']" --logging_file configs/logging.conf
+```
+
+Override the `train` config to execute evaluation with the trained model:
+
+```
+python -m monai.bundle run evaluating --meta_file configs/metadata.json --config_file "['configs/train.json','configs/evaluate.json']" --logging_file configs/logging.conf
 ```
 
 Execute inference:
 
 ```
-python -m monai.bundle run evaluator --meta_file configs/metadata.json --config_file configs/inference.json --logging_file configs/logging.conf
+python -m monai.bundle run evaluating --meta_file configs/metadata.json --config_file configs/inference.json --logging_file configs/logging.conf
 ```
 
 Verify the metadata format: