Skip to content

enable make check for folder examples/Pytorch_DDP #33

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/pnetcdf_c_master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ jobs:
pip install numpy cython cftime pytest twine wheel check-manifest
export MPICC=$MPICH_DIR/bin/mpicc
pip install mpi4py
pip install torch

- name: Install PnetCDF-Python
run: |
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/pnetcdf_c_official.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ jobs:
pip install numpy cython cftime pytest twine wheel check-manifest
export MPICC=$MPICH_DIR/bin/mpicc
pip install mpi4py
pip install torch

- name: Install PnetCDF-Python
run: |
Expand Down
15 changes: 9 additions & 6 deletions examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,32 @@ OUTPUT_DIR = _tmp_output
all:

check: ptest4
cd Pytorch_DDP && make check

ptests: ptest3 ptest4 ptest8
cd Pytorch_DDP && make ptests

ptest3:
@mkdir -p ${OUTPUT_DIR}
@echo "==========================================================="
@echo "======================================================================"
@echo " examples: Parallel testing on 3 MPI processes"
@echo "==========================================================="
@echo "======================================================================"
@${TESTS_ENVIRONMENT} export NPROC=3; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
@echo ""

ptest4:
@mkdir -p ${OUTPUT_DIR}
@echo "==========================================================="
@echo "======================================================================"
@echo " examples: Parallel testing on 4 MPI processes"
@echo "==========================================================="
@echo "======================================================================"
@${TESTS_ENVIRONMENT} export NPROC=4; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
@echo ""

ptest8:
@mkdir -p ${OUTPUT_DIR}
@echo "==========================================================="
@echo "======================================================================"
@echo " examples: Parallel testing on 8 MPI processes"
@echo "==========================================================="
@echo "======================================================================"
@${TESTS_ENVIRONMENT} export NPROC=8; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
@echo ""

Expand Down
45 changes: 45 additions & 0 deletions examples/Pytorch_DDP/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#
# Copyright (C) 2024, Northwestern University and Argonne National Laboratory
# See COPYRIGHT notice in top-level directory.
#

check_PROGRAMS = torch_ddp_skeleton.py


TESTS_ENVIRONMENT = export check_PROGRAMS="${check_PROGRAMS}";
TESTS_ENVIRONMENT += export PNETCDF_DIR="${PNETCDF_DIR}";

OUTPUT_DIR = _tmp_output

all:

check: ptest4
ptests: ptest3 ptest4 ptest8

ptest3:
@mkdir -p ${OUTPUT_DIR}
@echo "======================================================================"
@echo " examples/Pytorch_DDP: Parallel testing on 3 MPI processes"
@echo "======================================================================"
@${TESTS_ENVIRONMENT} export NPROC=3; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
@echo ""

ptest4:
@mkdir -p ${OUTPUT_DIR}
@echo "======================================================================"
@echo " examples/Pytorch_DDP: Parallel testing on 4 MPI processes"
@echo "======================================================================"
@${TESTS_ENVIRONMENT} export NPROC=4; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
@echo ""

ptest8:
@mkdir -p ${OUTPUT_DIR}
@echo "======================================================================"
@echo " examples/Pytorch_DDP: Parallel testing on 8 MPI processes"
@echo "======================================================================"
@${TESTS_ENVIRONMENT} export NPROC=8; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
@echo ""

clean:
rm -rf ${OUTPUT_DIR}

44 changes: 44 additions & 0 deletions examples/Pytorch_DDP/parallel_run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash
#
# Copyright (C) 2024, Northwestern University and Argonne National Laboratory
# See COPYRIGHT notice in top-level directory.
#

# Exit immediately if a command exits with a non-zero status.
set -e

# Get the directory containing this script
if test "x$NPROC" = x ; then
NPROC=4
fi

# get output folder from command line
if test "$#" -gt 0 ; then
args=("$@")
OUT_DIR="${args[0]}"
# check if output folder exists
if ! test -d $OUT_DIR ; then
echo "Error: output folder \"$OUT_DIR\" does not exist."
exit 1
fi
else
# output folder is not set at command line, use current folder
OUT_DIR="."
fi
# echo "OUT_DIR=$OUT_DIR"

for prog in $check_PROGRAMS; do
printf '%-60s' "Testing $prog"

if test $prog = "torch_ddp_skeleton.py" ; then
CMD="mpiexec -n $NPROC python $prog -q"
fi
$CMD
status=$?
if [ $status -ne 0 ]; then
echo " ---- FAIL"
else
echo " ---- PASS"
fi
done

16 changes: 14 additions & 2 deletions examples/Pytorch_DDP/torch_ddp_skeleton.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
# This is a skeleton program to show how to run Pytorch distributed environment
# with MPI

import os
import os, argparse
import torch
import torch.distributed as dist
from mpi4py import MPI

verbose = True

class distributed():
def get_size(self):
if dist.is_available() and dist.is_initialized():
Expand Down Expand Up @@ -217,13 +219,23 @@ def init_parallel():

#----< main() >----------------------------------------------------------------
def main():
global verbose

# initialize parallel environment
comm, device = init_parallel()

rank = comm.get_rank()
nprocs = comm.get_size()

print("nprocs = ", nprocs, " rank = ",rank," device = ", device)
# Get command-line arguments
args = None
parser = argparse.ArgumentParser()
parser.add_argument("-q", help="Quiet mode (reports when fail)", action="store_true")
args = parser.parse_args()
if args.q: verbose = False

if verbose:
print("nprocs = ", nprocs, " rank = ",rank," device = ", device)

comm.finalize()

Expand Down
12 changes: 6 additions & 6 deletions test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,25 +58,25 @@ ptests: ptest3 ptest4 ptest8

ptest3:
@mkdir -p ${OUTPUT_DIR}
@echo "==========================================================="
@echo "======================================================================"
@echo " test: Parallel testing on 3 MPI processes"
@echo "==========================================================="
@echo "======================================================================"
@${TESTS_ENVIRONMENT} export NPROC=3; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
@echo ""

ptest4:
@mkdir -p ${OUTPUT_DIR}
@echo "==========================================================="
@echo "======================================================================"
@echo " test: Parallel testing on 4 MPI processes"
@echo "==========================================================="
@echo "======================================================================"
@${TESTS_ENVIRONMENT} export NPROC=4; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
@echo ""

ptest8:
@mkdir -p ${OUTPUT_DIR}
@echo "==========================================================="
@echo "======================================================================"
@echo " test: Parallel testing on 8 MPI processes"
@echo "==========================================================="
@echo "======================================================================"
@${TESTS_ENVIRONMENT} export NPROC=8; ./parallel_run.sh ${OUTPUT_DIR} || exit 1
@echo ""

Expand Down
Loading