Project-MONAI · wyli · Sep 28, 2020 · Sep 21, 2020
diff --git a/3d_classification/ignite/densenet_evaluation_array.py b/3d_classification/ignite/densenet_evaluation_array.py
@@ -51,8 +51,8 @@ def main():
     # define nifti dataset
     val_ds = NiftiDataset(image_files=images, labels=labels, transform=val_transforms, image_only=False)
     # create DenseNet121
-    net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2).to(device)
 
     metric_name = "Accuracy"
     # add evaluation metric to the evaluator engine
@@ -81,7 +81,7 @@ def prepare_batch(batch, device=None, non_blocking=False):
     prediction_saver.attach(evaluator)
 
     # the model was trained by "densenet_training_array" example
-    CheckpointLoader(load_path="./runs_array/net_checkpoint_20.pth", load_dict={"net": net}).attach(evaluator)
+    CheckpointLoader(load_path="./runs_array/net_checkpoint_20.pt", load_dict={"net": net}).attach(evaluator)
 
     # create a validation data loader
     val_loader = DataLoader(val_ds, batch_size=2, num_workers=4, pin_memory=torch.cuda.is_available())

diff --git a/3d_classification/ignite/densenet_evaluation_dict.py b/3d_classification/ignite/densenet_evaluation_dict.py
@@ -58,8 +58,8 @@ def main():
     )
 
     # create DenseNet121
-    net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2).to(device)
 
     def prepare_batch(batch, device=None, non_blocking=False):
         return _prepare_batch((batch["img"], batch["label"]), device, non_blocking)
@@ -88,7 +88,7 @@ def prepare_batch(batch, device=None, non_blocking=False):
     prediction_saver.attach(evaluator)
 
     # the model was trained by "densenet_training_dict" example
-    CheckpointLoader(load_path="./runs_dict/net_checkpoint_20.pth", load_dict={"net": net}).attach(evaluator)
+    CheckpointLoader(load_path="./runs_dict/net_checkpoint_20.pt", load_dict={"net": net}).attach(evaluator)
 
     # create a validation data loader
     val_ds = monai.data.Dataset(data=val_files, transform=val_transforms)

diff --git a/3d_classification/ignite/densenet_training_array.py b/3d_classification/ignite/densenet_training_array.py
@@ -68,11 +68,11 @@ def main():
     print(type(im), im.shape, label)
 
     # create DenseNet121, CrossEntropyLoss and Adam optimizer
-    net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2).to(device)
     loss = torch.nn.CrossEntropyLoss()
     lr = 1e-5
     opt = torch.optim.Adam(net.parameters(), lr)
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
     # Ignite trainer expects batch=(img, label) and returns output=loss at every iteration,
     # user can add output_transform to return other values, like: y_pred, y, etc.

diff --git a/3d_classification/ignite/densenet_training_dict.py b/3d_classification/ignite/densenet_training_dict.py
@@ -86,11 +86,11 @@ def main():
     print(check_data["img"].shape, check_data["label"])
 
     # create DenseNet121, CrossEntropyLoss and Adam optimizer
-    net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2).to(device)
     loss = torch.nn.CrossEntropyLoss()
     lr = 1e-5
     opt = torch.optim.Adam(net.parameters(), lr)
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
     # Ignite trainer expects batch=(img, label) and returns output=loss at every iteration,
     # user can add output_transform to return other values, like: y_pred, y, etc.

diff --git a/3d_segmentation/ignite/unet_evaluation_array.py b/3d_segmentation/ignite/unet_evaluation_array.py
@@ -60,8 +60,7 @@ def main(tempdir):
         channels=(16, 32, 64, 128, 256),
         strides=(2, 2, 2, 2),
         num_res_units=2,
-    )
-    net.to(device)
+    ).to(device)
 
     # define sliding window size and batch size for windows inference
     roi_size = (96, 96, 96)
@@ -99,7 +98,7 @@ def _sliding_window_processor(engine, batch):
     file_saver.attach(evaluator)
 
     # the model was trained by "unet_training_array" example
-    ckpt_saver = CheckpointLoader(load_path="./runs_array/net_checkpoint_100.pth", load_dict={"net": net})
+    ckpt_saver = CheckpointLoader(load_path="./runs_array/net_checkpoint_100.pt", load_dict={"net": net})
     ckpt_saver.attach(evaluator)
 
     # sliding window inference for one image at every iteration

diff --git a/3d_segmentation/ignite/unet_evaluation_dict.py b/3d_segmentation/ignite/unet_evaluation_dict.py
@@ -67,8 +67,7 @@ def main(tempdir):
         channels=(16, 32, 64, 128, 256),
         strides=(2, 2, 2, 2),
         num_res_units=2,
-    )
-    net.to(device)
+    ).to(device)
 
     # define sliding window size and batch size for windows inference
     roi_size = (96, 96, 96)
@@ -104,7 +103,7 @@ def _sliding_window_processor(engine, batch):
         output_transform=lambda output: predict_segmentation(output[0]),
     ).attach(evaluator)
     # the model was trained by "unet_training_dict" example
-    CheckpointLoader(load_path="./runs_dict/net_checkpoint_50.pth", load_dict={"net": net}).attach(evaluator)
+    CheckpointLoader(load_path="./runs_dict/net_checkpoint_50.pt", load_dict={"net": net}).attach(evaluator)
 
     # sliding window inference for one image at every iteration
     val_loader = DataLoader(

diff --git a/3d_segmentation/ignite/unet_training_array.py b/3d_segmentation/ignite/unet_training_array.py
@@ -75,18 +75,18 @@ def main(tempdir):
     val_loader = DataLoader(val_ds, batch_size=5, num_workers=8, pin_memory=torch.cuda.is_available())
 
     # create UNet, DiceLoss and Adam optimizer
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     net = monai.networks.nets.UNet(
         dimensions=3,
         in_channels=1,
         out_channels=1,
         channels=(16, 32, 64, 128, 256),
         strides=(2, 2, 2, 2),
         num_res_units=2,
-    )
+    ).to(device)
     loss = monai.losses.DiceLoss(sigmoid=True)
     lr = 1e-3
     opt = torch.optim.Adam(net.parameters(), lr)
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
     # Ignite trainer expects batch=(img, seg) and returns output=loss at every iteration,
     # user can add output_transform to return other values, like: y_pred, y, etc.

diff --git a/3d_segmentation/ignite/unet_training_dict.py b/3d_segmentation/ignite/unet_training_dict.py
@@ -112,18 +112,18 @@ def main(tempdir):
     )
 
     # create UNet, DiceLoss and Adam optimizer
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     net = monai.networks.nets.UNet(
         dimensions=3,
         in_channels=1,
         out_channels=1,
         channels=(16, 32, 64, 128, 256),
         strides=(2, 2, 2, 2),
         num_res_units=2,
-    )
+    ).to(device)
     loss = monai.losses.DiceLoss(sigmoid=True)
     lr = 1e-3
     opt = torch.optim.Adam(net.parameters(), lr)
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
     # Ignite trainer expects batch=(img, seg) and returns output=loss at every iteration,
     # user can add output_transform to return other values, like: y_pred, y, etc.

diff --git a/3d_segmentation/unet_segmentation_3d_ignite.ipynb b/3d_segmentation/unet_segmentation_3d_ignite.ipynb
@@ -223,7 +223,7 @@
     "    ]\n",
     ")\n",
     "\n",
-    "# Define nifti dataset, dataloader.\n",
+    "# Define nifti dataset, dataloader\n",
     "ds = ArrayDataset(images, imtrans, segs, segtrans)\n",
     "loader = torch.utils.data.DataLoader(\n",
     "    ds, batch_size=10, num_workers=2, pin_memory=torch.cuda.is_available()\n",
@@ -245,15 +245,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Create UNet, DiceLoss and Adam optimizer.\n",
+    "# Create UNet, DiceLoss and Adam optimizer\n",
+    "device = torch.device(\"cuda:0\")\n",
     "net = UNet(\n",
     "    dimensions=3,\n",
     "    in_channels=1,\n",
     "    out_channels=1,\n",
     "    channels=(16, 32, 64, 128, 256),\n",
     "    strides=(2, 2, 2, 2),\n",
     "    num_res_units=2,\n",
-    ")\n",
+    ").to(device)\n",
     "\n",
     "loss = DiceLoss(sigmoid=True)\n",
     "lr = 1e-3\n",
@@ -274,7 +275,6 @@
    "outputs": [],
    "source": [
     "# Create trainer\n",
-    "device = torch.device(\"cuda:0\")\n",
     "trainer = ignite.engine.create_supervised_trainer(net, opt, loss, device, False)"
    ]
   },

diff --git a/acceleration/distributed_training/unet_evaluation_ddp.py b/acceleration/distributed_training/unet_evaluation_ddp.py
@@ -107,6 +107,7 @@ def evaluate(args):
 
     # create UNet, DiceLoss and Adam optimizer
     device = torch.device(f"cuda:{args.local_rank}")
+    torch.cuda.set_device(device)
     model = monai.networks.nets.UNet(
         dimensions=3,
         in_channels=1,
@@ -116,7 +117,7 @@ def evaluate(args):
         num_res_units=2,
     ).to(device)
     # wrap the model with DistributedDataParallel module
-    model = DistributedDataParallel(model, device_ids=[args.local_rank])
+    model = DistributedDataParallel(model, device_ids=[device])
     # config mapping to expected GPU device
     map_location = {"cuda:0": f"cuda:{args.local_rank}"}
     # load model parameters to GPU device

diff --git a/acceleration/distributed_training/unet_evaluation_horovod.py b/acceleration/distributed_training/unet_evaluation_horovod.py
@@ -115,6 +115,7 @@ def evaluate(args):
 
     # create UNet, DiceLoss and Adam optimizer
     device = torch.device(f"cuda:{hvd.local_rank()}")
+    torch.cuda.set_device(device)
     model = monai.networks.nets.UNet(
         dimensions=3,
         in_channels=1,

diff --git a/acceleration/distributed_training/unet_evaluation_workflows.py b/acceleration/distributed_training/unet_evaluation_workflows.py
@@ -32,7 +32,7 @@
 - Wrap Dataset with `DistributedSampler`, disable the `shuffle` in sampler and DataLoader.
 - Add `StatsHandler` and `SegmentationSaver` to the master process which is `dist.get_rank() == 0`.
 - ignite can automatically reduce metrics for distributed evaluation, refer to:
-  https://github.com/pytorch/ignite/blob/v0.3.0/ignite/metrics/metric.py#L85
+  https://github.com/pytorch/ignite/blob/v0.4.2/ignite/metrics/metric.py#L507
 
 Note:
     `torch.distributed.launch` will launch `nnodes * nproc_per_node = world_size` processes in total.
@@ -121,6 +121,7 @@ def evaluate(args):
 
     # create UNet, DiceLoss and Adam optimizer
     device = torch.device(f"cuda:{args.local_rank}")
+    torch.cuda.set_device(device)
     net = monai.networks.nets.UNet(
         dimensions=3,
         in_channels=1,
@@ -130,7 +131,7 @@ def evaluate(args):
         num_res_units=2,
     ).to(device)
     # wrap the model with DistributedDataParallel module
-    net = DistributedDataParallel(net, device_ids=[args.local_rank])
+    net = DistributedDataParallel(net, device_ids=[device])
 
     val_post_transforms = Compose(
         [
@@ -141,7 +142,7 @@ def evaluate(args):
     )
     val_handlers = [
         CheckpointLoader(
-            load_path="./runs/checkpoint_epoch=4.pth",
+            load_path="./runs/checkpoint_epoch=4.pt",
             load_dict={"net": net},
             # config mapping to expected GPU device
             map_location={"cuda:0": f"cuda:{args.local_rank}"},

diff --git a/acceleration/distributed_training/unet_training_ddp.py b/acceleration/distributed_training/unet_training_ddp.py
@@ -127,6 +127,7 @@ def train(args):
 
     # create UNet, DiceLoss and Adam optimizer
     device = torch.device(f"cuda:{args.local_rank}")
+    torch.cuda.set_device(device)
     model = monai.networks.nets.UNet(
         dimensions=3,
         in_channels=1,
@@ -138,7 +139,7 @@ def train(args):
     loss_function = monai.losses.DiceLoss(sigmoid=True).to(device)
     optimizer = torch.optim.Adam(model.parameters(), 1e-3)
     # wrap the model with DistributedDataParallel module
-    model = DistributedDataParallel(model, device_ids=[args.local_rank])
+    model = DistributedDataParallel(model, device_ids=[device])
 
     # start a typical PyTorch training
     epoch_loss_values = list()

diff --git a/acceleration/distributed_training/unet_training_horovod.py b/acceleration/distributed_training/unet_training_horovod.py
@@ -132,6 +132,7 @@ def train(args):
 
     # create UNet, DiceLoss and Adam optimizer
     device = torch.device(f"cuda:{hvd.local_rank()}")
+    torch.cuda.set_device(device)
     model = monai.networks.nets.UNet(
         dimensions=3,
         in_channels=1,

diff --git a/acceleration/distributed_training/unet_training_smartcache.py b/acceleration/distributed_training/unet_training_smartcache.py
@@ -187,6 +187,7 @@ def train(args):
 
     # create UNet, DiceLoss and Adam optimizer
     device = torch.device(f"cuda:{args.local_rank}")
+    torch.cuda.set_device(device)
     model = monai.networks.nets.UNet(
         dimensions=3,
         in_channels=1,
@@ -198,7 +199,7 @@ def train(args):
     loss_function = monai.losses.DiceLoss(sigmoid=True).to(device)
     optimizer = torch.optim.Adam(model.parameters(), 1e-3)
     # wrap the model with DistributedDataParallel module
-    model = DistributedDataParallel(model, device_ids=[args.local_rank])
+    model = DistributedDataParallel(model, device_ids=[device])
 
     # start a typical PyTorch training
     epoch_loss_values = list()

diff --git a/acceleration/distributed_training/unet_training_workflows.py b/acceleration/distributed_training/unet_training_workflows.py
@@ -32,7 +32,7 @@
   Instead, `SupervisedTrainer` shuffles data by `train_sampler.set_epoch(epoch)` before every epoch.
 - Add `StatsHandler` and `CheckpointHandler` to the master process which is `dist.get_rank() == 0`.
 - ignite can automatically reduce metrics for distributed training, refer to:
-  https://github.com/pytorch/ignite/blob/v0.3.0/ignite/metrics/metric.py#L85
+  https://github.com/pytorch/ignite/blob/v0.4.2/ignite/metrics/metric.py#L507
 
 Note:
     `torch.distributed.launch` will launch `nnodes * nproc_per_node = world_size` processes in total.
@@ -134,6 +134,7 @@ def train(args):
 
     # create UNet, DiceLoss and Adam optimizer
     device = torch.device(f"cuda:{args.local_rank}")
+    torch.cuda.set_device(device)
     net = monai.networks.nets.UNet(
         dimensions=3,
         in_channels=1,
@@ -142,11 +143,11 @@ def train(args):
         strides=(2, 2, 2, 2),
         num_res_units=2,
     ).to(device)
-    loss = monai.losses.DiceLoss(sigmoid=True).to(device)
+    loss = monai.losses.DiceLoss(sigmoid=True)
     opt = torch.optim.Adam(net.parameters(), 1e-3)
     lr_scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=2, gamma=0.1)
     # wrap the model with DistributedDataParallel module
-    net = DistributedDataParallel(net, device_ids=[args.local_rank])
+    net = DistributedDataParallel(net, device_ids=[device])
 
     train_post_transforms = Compose(
         [