Skip to content

Commit fbfbce2

Browse files
authored
[DLMED] update to ignite 0.4.2 APIs (#29)
Signed-off-by: Nic Ma <[email protected]>
1 parent 678f4f8 commit fbfbce2

16 files changed

+36
-31
lines changed

3d_classification/ignite/densenet_evaluation_array.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ def main():
5151
# define nifti dataset
5252
val_ds = NiftiDataset(image_files=images, labels=labels, transform=val_transforms, image_only=False)
5353
# create DenseNet121
54-
net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2)
5554
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
55+
net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2).to(device)
5656

5757
metric_name = "Accuracy"
5858
# add evaluation metric to the evaluator engine
@@ -81,7 +81,7 @@ def prepare_batch(batch, device=None, non_blocking=False):
8181
prediction_saver.attach(evaluator)
8282

8383
# the model was trained by "densenet_training_array" example
84-
CheckpointLoader(load_path="./runs_array/net_checkpoint_20.pth", load_dict={"net": net}).attach(evaluator)
84+
CheckpointLoader(load_path="./runs_array/net_checkpoint_20.pt", load_dict={"net": net}).attach(evaluator)
8585

8686
# create a validation data loader
8787
val_loader = DataLoader(val_ds, batch_size=2, num_workers=4, pin_memory=torch.cuda.is_available())

3d_classification/ignite/densenet_evaluation_dict.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ def main():
5858
)
5959

6060
# create DenseNet121
61-
net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2)
6261
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
62+
net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2).to(device)
6363

6464
def prepare_batch(batch, device=None, non_blocking=False):
6565
return _prepare_batch((batch["img"], batch["label"]), device, non_blocking)
@@ -88,7 +88,7 @@ def prepare_batch(batch, device=None, non_blocking=False):
8888
prediction_saver.attach(evaluator)
8989

9090
# the model was trained by "densenet_training_dict" example
91-
CheckpointLoader(load_path="./runs_dict/net_checkpoint_20.pth", load_dict={"net": net}).attach(evaluator)
91+
CheckpointLoader(load_path="./runs_dict/net_checkpoint_20.pt", load_dict={"net": net}).attach(evaluator)
9292

9393
# create a validation data loader
9494
val_ds = monai.data.Dataset(data=val_files, transform=val_transforms)

3d_classification/ignite/densenet_training_array.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,11 @@ def main():
6868
print(type(im), im.shape, label)
6969

7070
# create DenseNet121, CrossEntropyLoss and Adam optimizer
71-
net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2)
71+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
72+
net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2).to(device)
7273
loss = torch.nn.CrossEntropyLoss()
7374
lr = 1e-5
7475
opt = torch.optim.Adam(net.parameters(), lr)
75-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7676

7777
# Ignite trainer expects batch=(img, label) and returns output=loss at every iteration,
7878
# user can add output_transform to return other values, like: y_pred, y, etc.

3d_classification/ignite/densenet_training_dict.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,11 @@ def main():
8686
print(check_data["img"].shape, check_data["label"])
8787

8888
# create DenseNet121, CrossEntropyLoss and Adam optimizer
89-
net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2)
89+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
90+
net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2).to(device)
9091
loss = torch.nn.CrossEntropyLoss()
9192
lr = 1e-5
9293
opt = torch.optim.Adam(net.parameters(), lr)
93-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9494

9595
# Ignite trainer expects batch=(img, label) and returns output=loss at every iteration,
9696
# user can add output_transform to return other values, like: y_pred, y, etc.

3d_segmentation/ignite/unet_evaluation_array.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,7 @@ def main(tempdir):
6060
channels=(16, 32, 64, 128, 256),
6161
strides=(2, 2, 2, 2),
6262
num_res_units=2,
63-
)
64-
net.to(device)
63+
).to(device)
6564

6665
# define sliding window size and batch size for windows inference
6766
roi_size = (96, 96, 96)
@@ -99,7 +98,7 @@ def _sliding_window_processor(engine, batch):
9998
file_saver.attach(evaluator)
10099

101100
# the model was trained by "unet_training_array" example
102-
ckpt_saver = CheckpointLoader(load_path="./runs_array/net_checkpoint_100.pth", load_dict={"net": net})
101+
ckpt_saver = CheckpointLoader(load_path="./runs_array/net_checkpoint_100.pt", load_dict={"net": net})
103102
ckpt_saver.attach(evaluator)
104103

105104
# sliding window inference for one image at every iteration

3d_segmentation/ignite/unet_evaluation_dict.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,7 @@ def main(tempdir):
6767
channels=(16, 32, 64, 128, 256),
6868
strides=(2, 2, 2, 2),
6969
num_res_units=2,
70-
)
71-
net.to(device)
70+
).to(device)
7271

7372
# define sliding window size and batch size for windows inference
7473
roi_size = (96, 96, 96)
@@ -104,7 +103,7 @@ def _sliding_window_processor(engine, batch):
104103
output_transform=lambda output: predict_segmentation(output[0]),
105104
).attach(evaluator)
106105
# the model was trained by "unet_training_dict" example
107-
CheckpointLoader(load_path="./runs_dict/net_checkpoint_50.pth", load_dict={"net": net}).attach(evaluator)
106+
CheckpointLoader(load_path="./runs_dict/net_checkpoint_50.pt", load_dict={"net": net}).attach(evaluator)
108107

109108
# sliding window inference for one image at every iteration
110109
val_loader = DataLoader(

3d_segmentation/ignite/unet_training_array.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,18 @@ def main(tempdir):
7575
val_loader = DataLoader(val_ds, batch_size=5, num_workers=8, pin_memory=torch.cuda.is_available())
7676

7777
# create UNet, DiceLoss and Adam optimizer
78+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7879
net = monai.networks.nets.UNet(
7980
dimensions=3,
8081
in_channels=1,
8182
out_channels=1,
8283
channels=(16, 32, 64, 128, 256),
8384
strides=(2, 2, 2, 2),
8485
num_res_units=2,
85-
)
86+
).to(device)
8687
loss = monai.losses.DiceLoss(sigmoid=True)
8788
lr = 1e-3
8889
opt = torch.optim.Adam(net.parameters(), lr)
89-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9090

9191
# Ignite trainer expects batch=(img, seg) and returns output=loss at every iteration,
9292
# user can add output_transform to return other values, like: y_pred, y, etc.

3d_segmentation/ignite/unet_training_dict.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,18 +112,18 @@ def main(tempdir):
112112
)
113113

114114
# create UNet, DiceLoss and Adam optimizer
115+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
115116
net = monai.networks.nets.UNet(
116117
dimensions=3,
117118
in_channels=1,
118119
out_channels=1,
119120
channels=(16, 32, 64, 128, 256),
120121
strides=(2, 2, 2, 2),
121122
num_res_units=2,
122-
)
123+
).to(device)
123124
loss = monai.losses.DiceLoss(sigmoid=True)
124125
lr = 1e-3
125126
opt = torch.optim.Adam(net.parameters(), lr)
126-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
127127

128128
# Ignite trainer expects batch=(img, seg) and returns output=loss at every iteration,
129129
# user can add output_transform to return other values, like: y_pred, y, etc.

3d_segmentation/unet_segmentation_3d_ignite.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@
223223
" ]\n",
224224
")\n",
225225
"\n",
226-
"# Define nifti dataset, dataloader.\n",
226+
"# Define nifti dataset, dataloader\n",
227227
"ds = ArrayDataset(images, imtrans, segs, segtrans)\n",
228228
"loader = torch.utils.data.DataLoader(\n",
229229
" ds, batch_size=10, num_workers=2, pin_memory=torch.cuda.is_available()\n",
@@ -245,15 +245,16 @@
245245
"metadata": {},
246246
"outputs": [],
247247
"source": [
248-
"# Create UNet, DiceLoss and Adam optimizer.\n",
248+
"# Create UNet, DiceLoss and Adam optimizer\n",
249+
"device = torch.device(\"cuda:0\")\n",
249250
"net = UNet(\n",
250251
" dimensions=3,\n",
251252
" in_channels=1,\n",
252253
" out_channels=1,\n",
253254
" channels=(16, 32, 64, 128, 256),\n",
254255
" strides=(2, 2, 2, 2),\n",
255256
" num_res_units=2,\n",
256-
")\n",
257+
").to(device)\n",
257258
"\n",
258259
"loss = DiceLoss(sigmoid=True)\n",
259260
"lr = 1e-3\n",
@@ -274,7 +275,6 @@
274275
"outputs": [],
275276
"source": [
276277
"# Create trainer\n",
277-
"device = torch.device(\"cuda:0\")\n",
278278
"trainer = ignite.engine.create_supervised_trainer(net, opt, loss, device, False)"
279279
]
280280
},

acceleration/distributed_training/unet_evaluation_ddp.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ def evaluate(args):
107107

108108
# create UNet, DiceLoss and Adam optimizer
109109
device = torch.device(f"cuda:{args.local_rank}")
110+
torch.cuda.set_device(device)
110111
model = monai.networks.nets.UNet(
111112
dimensions=3,
112113
in_channels=1,
@@ -116,7 +117,7 @@ def evaluate(args):
116117
num_res_units=2,
117118
).to(device)
118119
# wrap the model with DistributedDataParallel module
119-
model = DistributedDataParallel(model, device_ids=[args.local_rank])
120+
model = DistributedDataParallel(model, device_ids=[device])
120121
# config mapping to expected GPU device
121122
map_location = {"cuda:0": f"cuda:{args.local_rank}"}
122123
# load model parameters to GPU device

acceleration/distributed_training/unet_evaluation_horovod.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def evaluate(args):
115115

116116
# create UNet, DiceLoss and Adam optimizer
117117
device = torch.device(f"cuda:{hvd.local_rank()}")
118+
torch.cuda.set_device(device)
118119
model = monai.networks.nets.UNet(
119120
dimensions=3,
120121
in_channels=1,

acceleration/distributed_training/unet_evaluation_workflows.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
- Wrap Dataset with `DistributedSampler`, disable the `shuffle` in sampler and DataLoader.
3333
- Add `StatsHandler` and `SegmentationSaver` to the master process which is `dist.get_rank() == 0`.
3434
- ignite can automatically reduce metrics for distributed evaluation, refer to:
35-
https://github.com/pytorch/ignite/blob/v0.3.0/ignite/metrics/metric.py#L85
35+
https://github.com/pytorch/ignite/blob/v0.4.2/ignite/metrics/metric.py#L507
3636
3737
Note:
3838
`torch.distributed.launch` will launch `nnodes * nproc_per_node = world_size` processes in total.
@@ -121,6 +121,7 @@ def evaluate(args):
121121

122122
# create UNet, DiceLoss and Adam optimizer
123123
device = torch.device(f"cuda:{args.local_rank}")
124+
torch.cuda.set_device(device)
124125
net = monai.networks.nets.UNet(
125126
dimensions=3,
126127
in_channels=1,
@@ -130,7 +131,7 @@ def evaluate(args):
130131
num_res_units=2,
131132
).to(device)
132133
# wrap the model with DistributedDataParallel module
133-
net = DistributedDataParallel(net, device_ids=[args.local_rank])
134+
net = DistributedDataParallel(net, device_ids=[device])
134135

135136
val_post_transforms = Compose(
136137
[
@@ -141,7 +142,7 @@ def evaluate(args):
141142
)
142143
val_handlers = [
143144
CheckpointLoader(
144-
load_path="./runs/checkpoint_epoch=4.pth",
145+
load_path="./runs/checkpoint_epoch=4.pt",
145146
load_dict={"net": net},
146147
# config mapping to expected GPU device
147148
map_location={"cuda:0": f"cuda:{args.local_rank}"},

acceleration/distributed_training/unet_training_ddp.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ def train(args):
127127

128128
# create UNet, DiceLoss and Adam optimizer
129129
device = torch.device(f"cuda:{args.local_rank}")
130+
torch.cuda.set_device(device)
130131
model = monai.networks.nets.UNet(
131132
dimensions=3,
132133
in_channels=1,
@@ -138,7 +139,7 @@ def train(args):
138139
loss_function = monai.losses.DiceLoss(sigmoid=True).to(device)
139140
optimizer = torch.optim.Adam(model.parameters(), 1e-3)
140141
# wrap the model with DistributedDataParallel module
141-
model = DistributedDataParallel(model, device_ids=[args.local_rank])
142+
model = DistributedDataParallel(model, device_ids=[device])
142143

143144
# start a typical PyTorch training
144145
epoch_loss_values = list()

acceleration/distributed_training/unet_training_horovod.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ def train(args):
132132

133133
# create UNet, DiceLoss and Adam optimizer
134134
device = torch.device(f"cuda:{hvd.local_rank()}")
135+
torch.cuda.set_device(device)
135136
model = monai.networks.nets.UNet(
136137
dimensions=3,
137138
in_channels=1,

acceleration/distributed_training/unet_training_smartcache.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ def train(args):
187187

188188
# create UNet, DiceLoss and Adam optimizer
189189
device = torch.device(f"cuda:{args.local_rank}")
190+
torch.cuda.set_device(device)
190191
model = monai.networks.nets.UNet(
191192
dimensions=3,
192193
in_channels=1,
@@ -198,7 +199,7 @@ def train(args):
198199
loss_function = monai.losses.DiceLoss(sigmoid=True).to(device)
199200
optimizer = torch.optim.Adam(model.parameters(), 1e-3)
200201
# wrap the model with DistributedDataParallel module
201-
model = DistributedDataParallel(model, device_ids=[args.local_rank])
202+
model = DistributedDataParallel(model, device_ids=[device])
202203

203204
# start a typical PyTorch training
204205
epoch_loss_values = list()

acceleration/distributed_training/unet_training_workflows.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
Instead, `SupervisedTrainer` shuffles data by `train_sampler.set_epoch(epoch)` before every epoch.
3333
- Add `StatsHandler` and `CheckpointHandler` to the master process which is `dist.get_rank() == 0`.
3434
- ignite can automatically reduce metrics for distributed training, refer to:
35-
https://github.com/pytorch/ignite/blob/v0.3.0/ignite/metrics/metric.py#L85
35+
https://github.com/pytorch/ignite/blob/v0.4.2/ignite/metrics/metric.py#L507
3636
3737
Note:
3838
`torch.distributed.launch` will launch `nnodes * nproc_per_node = world_size` processes in total.
@@ -134,6 +134,7 @@ def train(args):
134134

135135
# create UNet, DiceLoss and Adam optimizer
136136
device = torch.device(f"cuda:{args.local_rank}")
137+
torch.cuda.set_device(device)
137138
net = monai.networks.nets.UNet(
138139
dimensions=3,
139140
in_channels=1,
@@ -142,11 +143,11 @@ def train(args):
142143
strides=(2, 2, 2, 2),
143144
num_res_units=2,
144145
).to(device)
145-
loss = monai.losses.DiceLoss(sigmoid=True).to(device)
146+
loss = monai.losses.DiceLoss(sigmoid=True)
146147
opt = torch.optim.Adam(net.parameters(), 1e-3)
147148
lr_scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=2, gamma=0.1)
148149
# wrap the model with DistributedDataParallel module
149-
net = DistributedDataParallel(net, device_ids=[args.local_rank])
150+
net = DistributedDataParallel(net, device_ids=[device])
150151

151152
train_post_transforms = Compose(
152153
[

0 commit comments

Comments
 (0)