Skip to content

Commit aaf14cc

Browse files
committed
check for AMD GPU device and rocm installation with rocminfo
Signed-off-by: Vicky Tsang <[email protected]>
1 parent 07013c0 commit aaf14cc

File tree

3 files changed

+50
-19
lines changed

3 files changed

+50
-19
lines changed

monai/deploy/packager/util.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from monai.deploy.exceptions import WrongValueError
2424
from monai.deploy.packager.constants import DefaultValues
2525
from monai.deploy.packager.templates import Template
26+
from monai.deploy.utils.deviceutil import has_rocm
2627
from monai.deploy.utils.fileutil import checksum
2728
from monai.deploy.utils.importutil import dist_module_path, dist_requires, get_application
2829
from monai.deploy.utils.spinner import ProgressSpinner
@@ -42,11 +43,10 @@ def verify_base_image(base_image: str) -> str:
4243
str: returns string identifier of the dockerfile template to build MAP
4344
if valid base image provided, returns empty string otherwise
4445
"""
45-
import torch
46-
if "AMD" not in torch.cuda.get_device_name(0):
47-
valid_prefixes = {"nvcr.io/nvidia/cuda": "ubuntu", "nvcr.io/nvidia/pytorch": "pytorch"}
48-
else:
46+
if has_rocm():
4947
valid_prefixes = {"rocm": "ubuntu", "rocm/pytorch": "pytorch"}
48+
else:
49+
valid_prefixes = {"nvcr.io/nvidia/cuda": "ubuntu", "nvcr.io/nvidia/pytorch": "pytorch"}
5050

5151
for prefix, template in valid_prefixes.items():
5252
if prefix in base_image:
@@ -93,18 +93,17 @@ def initialize_args(args: Namespace) -> Dict:
9393
if args.base:
9494
dockerfile_type = verify_base_image(args.base)
9595
if not dockerfile_type:
96-
import torch
97-
if "AMD" not in torch.cuda.get_device_name(0):
96+
if has_rocm():
9897
logger.error(
9998
"Provided base image '{}' is not supported \n \
100-
Please provide a Cuda or Pytorch image from https://ngc.nvidia.com/ (nvcr.io/nvidia)".format(
99+
Please provide a ROCm or Pytorch image from https://hub.docker.com/r/rocm/pytorch".format(
101100
args.base
102101
)
103102
)
104103
else:
105104
logger.error(
106105
"Provided base image '{}' is not supported \n \
107-
Please provide a ROCm or Pytorch image from https://hub.docker.com/r/rocm/pytorch".format(
106+
Please provide a Cuda or Pytorch image from https://ngc.nvidia.com/ (nvcr.io/nvidia)".format(
108107
args.base
109108
)
110109
)

monai/deploy/runner/runner.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from typing import Tuple
2121

2222
from monai.deploy.runner.utils import get_requested_gpus, run_cmd, verify_image
23+
from monai.deploy.utils.deviceutil import has_rocm
2324

2425
logger = logging.getLogger("app_runner")
2526

@@ -87,9 +88,8 @@ def run_app(map_name: str, input_path: Path, output_path: Path, app_info: dict,
8788
# Use nvidia-docker if GPU resources are requested
8889
requested_gpus = get_requested_gpus(pkg_info)
8990
if requested_gpus > 0:
90-
import torch
91-
if "AMD" not in torch.cuda.get_device_name(0):
92-
cmd = "nvidia-docker run --rm -a STDERR"
91+
if not has_rocm():
92+
cmd = "nvidia-docker run --rm -a STDERR"
9393

9494
if not quiet:
9595
cmd += " -a STDOUT"
@@ -162,14 +162,13 @@ def pkg_specific_dependency_verification(pkg_info: dict) -> bool:
162162
"""
163163
requested_gpus = get_requested_gpus(pkg_info)
164164
if requested_gpus > 0:
165-
import torch
166-
if "AMD" not in torch.cuda.get_device_name(0):
167-
# check for nvidia-docker
168-
prog = "nvidia-docker"
169-
logger.info('--> Verifying if "%s" is installed...\n', prog)
170-
if not shutil.which(prog):
171-
logger.error('ERROR: "%s" not installed, please install nvidia-docker.', prog)
172-
return False
165+
if not has_rocm():
166+
# check for nvidia-docker
167+
prog = "nvidia-docker"
168+
logger.info('--> Verifying if "%s" is installed...\n', prog)
169+
if not shutil.which(prog):
170+
logger.error('ERROR: "%s" not installed, please install nvidia-docker.', prog)
171+
return False
173172

174173
return True
175174

monai/deploy/utils/deviceutil.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 2023 MONAI Consortium
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
# Unless required by applicable law or agreed to in writing, software
7+
# distributed under the License is distributed on an "AS IS" BASIS,
8+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
# See the License for the specific language governing permissions and
10+
# limitations under the License.
11+
12+
import subprocess
13+
14+
15+
def has_rocm():
16+
"""Return True if ROCm is installed and GPU device is detected.
17+
18+
Args:
19+
20+
Returns:
21+
True if ROCm is installed and GPU device is detected, otherwise False.
22+
"""
23+
cmd = "rocminfo"
24+
process = subprocess.run([cmd], stdout=subprocess.PIPE)
25+
for line_in in process.stdout.decode().splitlines():
26+
if "Device Type" in line_in and "GPU" in line_in:
27+
return True
28+
29+
return False
30+
31+
32+
if __name__ == "__main__":
33+
print(has_rocm())

0 commit comments

Comments
 (0)