Skip to content

Commit 9441bf0

Browse files
committed
[DLMED] update horovod ddp
Signed-off-by: Nic Ma <[email protected]>
1 parent fa008a8 commit 9441bf0

File tree

2 files changed

+5
-8
lines changed

2 files changed

+5
-8
lines changed

acceleration/distributed_training/unet_evaluation_horovod.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
Example script to execute this program, only need to run on the master node:
3636
`horovodrun -np 16 -H server1:4,server2:4,server3:4,server4:4 python unet_evaluation_horovod.py -d "./testdata"`
3737
38-
This example was tested with [Ubuntu 16.04/20.04], [NCCL 2.6.3], [horovod 0.19.5].
38+
This example was tested with [Ubuntu 16.04/20.04], [NCCL 2.6.3], [horovod 0.25.0].
3939
4040
Referring to: https://github.com/horovod/horovod/blob/master/examples/pytorch_mnist.py
4141
@@ -56,7 +56,7 @@
5656
from monai.data import DataLoader, Dataset, create_test_image_3d, decollate_batch
5757
from monai.inferers import sliding_window_inference
5858
from monai.metrics import DiceMetric
59-
from monai.transforms import Activations, AsChannelFirstd, AsDiscrete, Compose, LoadImaged, ScaleIntensityd, EnsureTyped, EnsureType
59+
from monai.transforms import Activations, AsChannelFirstd, AsDiscrete, Compose, LoadImaged, ScaleIntensityd, EnsureType
6060

6161

6262
def evaluate(args):
@@ -88,7 +88,6 @@ def evaluate(args):
8888
LoadImaged(keys=["img", "seg"]),
8989
AsChannelFirstd(keys=["img", "seg"], channel_dim=-1),
9090
ScaleIntensityd(keys="img"),
91-
EnsureTyped(keys=["img", "seg"]),
9291
]
9392
)
9493

@@ -156,7 +155,7 @@ def main():
156155
evaluate(args=args)
157156

158157

159-
# Example script to execute this program only on the master node:
158+
# Example script to execute this program on 4 nodes (only need to run below command on the master node):
160159
# horovodrun -np 16 -H server1:4,server2:4,server3:4,server4:4 python unet_evaluation_horovod.py -d "./testdata"
161160
if __name__ == "__main__":
162161
main()

acceleration/distributed_training/unet_training_horovod.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
Example script to execute this program, only need to run on the master node:
4040
`horovodrun -np 16 -H server1:4,server2:4,server3:4,server4:4 python unet_training_horovod.py -d "./testdata"`
4141
42-
This example was tested with [Ubuntu 16.04/20.04], [NCCL 2.6.3], [horovod 0.19.5].
42+
This example was tested with [Ubuntu 16.04/20.04], [NCCL 2.6.3], [horovod 0.25.0].
4343
4444
Referring to: https://github.com/horovod/horovod/blob/master/examples/pytorch_mnist.py
4545
@@ -66,7 +66,6 @@
6666
RandCropByPosNegLabeld,
6767
RandRotate90d,
6868
ScaleIntensityd,
69-
EnsureTyped,
7069
)
7170

7271

@@ -106,7 +105,6 @@ def train(args):
106105
keys=["img", "seg"], label_key="seg", spatial_size=[96, 96, 96], pos=1, neg=1, num_samples=4
107106
),
108107
RandRotate90d(keys=["img", "seg"], prob=0.5, spatial_axes=[0, 2]),
109-
EnsureTyped(keys=["img", "seg"]),
110108
]
111109
)
112110

@@ -188,7 +186,7 @@ def main():
188186
train(args=args)
189187

190188

191-
# Example script to execute this program only on the master node:
189+
# Example script to execute this program on 4 nodes (only need to run below command on the master node):
192190
# horovodrun -np 16 -H server1:4,server2:4,server3:4,server4:4 python unet_training_horovod.py -d "./testdata"
193191
if __name__ == "__main__":
194192
main()

0 commit comments

Comments
 (0)