Skip to content

Commit 3194761

Browse files
committed
[AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed
This patch adds new clang tool named amdgpu-arch which uses HSA to detect installed AMDGPU and report back latter's march. This tool is built only if system has HSA installed. The value printed by amdgpu-arch is used to fill -march when latter is not explicitly provided in -Xopenmp-target. Reviewed By: JonChesterfield, gregrodgers Differential Revision: https://reviews.llvm.org/D99949
1 parent 6533451 commit 3194761

File tree

14 files changed

+267
-8
lines changed

14 files changed

+267
-8
lines changed

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ def err_drv_no_hip_runtime : Error<
6767
"cannot find HIP runtime. Provide its path via --rocm-path, or pass "
6868
"-nogpuinc to build without HIP runtime.">;
6969

70+
def err_drv_undetermined_amdgpu_arch : Error<
71+
"Cannot determine AMDGPU architecture: %0. Consider passing it via --march.">;
7072
def err_drv_cuda_version_unsupported : Error<
7173
"GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), "
7274
"but installation at %3 is %4. Use --cuda-path to specify a different CUDA "

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -924,6 +924,8 @@ def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group<i_Group>,
924924
HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">;
925925
def hip_path_EQ : Joined<["--"], "hip-path=">, Group<i_Group>,
926926
HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">;
927+
def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">, Group<i_Group>,
928+
HelpText<"Tool used for detecting AMD GPU arch in the system.">;
927929
def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group<Link_Group>,
928930
HelpText<"ROCm device library path. Alternative to rocm-path.">;
929931
def : Joined<["--"], "hip-device-lib-path=">, Alias<rocm_device_lib_path_EQ>;

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,16 @@
1212
#include "clang/Basic/TargetID.h"
1313
#include "clang/Driver/Compilation.h"
1414
#include "clang/Driver/DriverDiagnostic.h"
15+
#include "clang/Driver/Options.h"
1516
#include "llvm/Option/ArgList.h"
17+
#include "llvm/Support/Error.h"
18+
#include "llvm/Support/FileUtilities.h"
19+
#include "llvm/Support/LineIterator.h"
1620
#include "llvm/Support/Path.h"
1721
#include "llvm/Support/VirtualFileSystem.h"
22+
#include <system_error>
23+
24+
#define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch"
1825

1926
using namespace clang::driver;
2027
using namespace clang::driver::tools;
@@ -715,6 +722,78 @@ void AMDGPUToolChain::checkTargetID(
715722
}
716723
}
717724

725+
llvm::Error
726+
AMDGPUToolChain::detectSystemGPUs(const ArgList &Args,
727+
SmallVector<std::string, 1> &GPUArchs) const {
728+
std::string Program;
729+
if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ))
730+
Program = A->getValue();
731+
else
732+
Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME);
733+
llvm::SmallString<64> OutputFile;
734+
llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */,
735+
OutputFile);
736+
llvm::FileRemover OutputRemover(OutputFile.c_str());
737+
llvm::Optional<llvm::StringRef> Redirects[] = {
738+
{""},
739+
StringRef(OutputFile),
740+
{""},
741+
};
742+
743+
std::string ErrorMessage;
744+
if (int Result = llvm::sys::ExecuteAndWait(
745+
Program.c_str(), {}, {}, Redirects, /* SecondsToWait */ 0,
746+
/*MemoryLimit*/ 0, &ErrorMessage)) {
747+
if (Result > 0) {
748+
ErrorMessage = "Exited with error code " + std::to_string(Result);
749+
} else if (Result == -1) {
750+
ErrorMessage = "Execute failed: " + ErrorMessage;
751+
} else {
752+
ErrorMessage = "Crashed: " + ErrorMessage;
753+
}
754+
755+
return llvm::createStringError(std::error_code(),
756+
Program + ": " + ErrorMessage);
757+
}
758+
759+
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> OutputBuf =
760+
llvm::MemoryBuffer::getFile(OutputFile.c_str());
761+
if (!OutputBuf) {
762+
return llvm::createStringError(OutputBuf.getError(),
763+
"Failed to read stdout of " + Program +
764+
": " + OutputBuf.getError().message());
765+
}
766+
767+
for (llvm::line_iterator LineIt(**OutputBuf); !LineIt.is_at_end(); ++LineIt) {
768+
GPUArchs.push_back(LineIt->str());
769+
}
770+
return llvm::Error::success();
771+
}
772+
773+
llvm::Error AMDGPUToolChain::getSystemGPUArch(const ArgList &Args,
774+
std::string &GPUArch) const {
775+
// detect the AMDGPU installed in system
776+
SmallVector<std::string, 1> GPUArchs;
777+
auto Err = detectSystemGPUs(Args, GPUArchs);
778+
if (Err) {
779+
return Err;
780+
}
781+
if (GPUArchs.empty()) {
782+
return llvm::createStringError(std::error_code(),
783+
"No AMD GPU detected in the system");
784+
}
785+
GPUArch = GPUArchs[0];
786+
if (GPUArchs.size() > 1) {
787+
bool AllSame = std::all_of(
788+
GPUArchs.begin(), GPUArchs.end(),
789+
[&](const StringRef &GPUArch) { return GPUArch == GPUArchs.front(); });
790+
if (!AllSame)
791+
return llvm::createStringError(
792+
std::error_code(), "Multiple AMD GPUs found with different archs");
793+
}
794+
return llvm::Error::success();
795+
}
796+
718797
void ROCMToolChain::addClangTargetOptions(
719798
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
720799
Action::OffloadKind DeviceOffloadingKind) const {

clang/lib/Driver/ToolChains/AMDGPU.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,20 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
100100
/// Should skip argument.
101101
bool shouldSkipArgument(const llvm::opt::Arg *Arg) const;
102102

103+
/// Uses amdgpu_arch tool to get arch of the system GPU. Will return error
104+
/// if unable to find one.
105+
llvm::Error getSystemGPUArch(const llvm::opt::ArgList &Args,
106+
std::string &GPUArch) const;
107+
103108
protected:
104109
/// Check and diagnose invalid target ID specified by -mcpu.
105110
void checkTargetID(const llvm::opt::ArgList &DriverArgs) const;
106111

107112
/// Get GPU arch from -mcpu without checking.
108113
StringRef getGPUArch(const llvm::opt::ArgList &DriverArgs) const;
114+
115+
llvm::Error detectSystemGPUs(const llvm::opt::ArgList &Args,
116+
SmallVector<std::string, 1> &GPUArchs) const;
109117
};
110118

111119
class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {

clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,14 @@
1010
#include "AMDGPU.h"
1111
#include "CommonArgs.h"
1212
#include "InputInfo.h"
13+
#include "clang/Basic/DiagnosticDriver.h"
1314
#include "clang/Driver/Compilation.h"
1415
#include "clang/Driver/Driver.h"
1516
#include "clang/Driver/DriverDiagnostic.h"
1617
#include "clang/Driver/Options.h"
1718
#include "llvm/Support/FileSystem.h"
19+
#include "llvm/Support/FormatAdapters.h"
20+
#include "llvm/Support/FormatVariadic.h"
1821
#include "llvm/Support/Path.h"
1922

2023
using namespace clang::driver;
@@ -66,6 +69,18 @@ static void addLLCOptArg(const llvm::opt::ArgList &Args,
6669
CmdArgs.push_back(Args.MakeArgString("-O" + OOpt));
6770
}
6871
}
72+
73+
static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC,
74+
std::string &GPUArch) {
75+
if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) {
76+
std::string ErrMsg =
77+
llvm::formatv("{0}", llvm::fmt_consume(std::move(Err)));
78+
TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg;
79+
return false;
80+
}
81+
82+
return true;
83+
}
6984
} // namespace
7085

7186
const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
@@ -145,17 +160,23 @@ void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
145160
const InputInfoList &Inputs,
146161
const ArgList &Args,
147162
const char *LinkingOutput) const {
163+
const ToolChain &TC = getToolChain();
148164
assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target");
149165

150-
StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
151-
assert(GPUArch.startswith("gfx") && "Unsupported sub arch");
166+
const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC =
167+
static_cast<const toolchains::AMDGPUOpenMPToolChain &>(TC);
168+
169+
std::string GPUArch = Args.getLastArgValue(options::OPT_march_EQ).str();
170+
if (GPUArch.empty()) {
171+
if (!checkSystemForAMDGPU(Args, AMDGPUOpenMPTC, GPUArch))
172+
return;
173+
}
152174

153175
// Prefix for temporary file name.
154176
std::string Prefix;
155177
for (const auto &II : Inputs)
156178
if (II.isFilename())
157-
Prefix =
158-
llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch.str();
179+
Prefix = llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch;
159180
assert(Prefix.length() && "no linker inputs are files ");
160181

161182
// Each command outputs different files.
@@ -186,18 +207,22 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
186207
Action::OffloadKind DeviceOffloadingKind) const {
187208
HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
188209

189-
StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
190-
assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
210+
std::string GPUArch = DriverArgs.getLastArgValue(options::OPT_march_EQ).str();
211+
if (GPUArch.empty()) {
212+
if (!checkSystemForAMDGPU(DriverArgs, *this, GPUArch))
213+
return;
214+
}
215+
191216
assert(DeviceOffloadingKind == Action::OFK_OpenMP &&
192217
"Only OpenMP offloading kinds are supported.");
193218

194219
CC1Args.push_back("-target-cpu");
195-
CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch));
220+
CC1Args.push_back(DriverArgs.MakeArgStringRef(GPUArch));
196221
CC1Args.push_back("-fcuda-is-device");
197222

198223
if (DriverArgs.hasArg(options::OPT_nogpulib))
199224
return;
200-
std::string BitcodeSuffix = "amdgcn-" + GpuArch.str();
225+
std::string BitcodeSuffix = "amdgcn-" + GPUArch;
201226
addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
202227
getTriple());
203228
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/usr/bin/env sh
2+
echo gfx908
3+
echo gfx906
4+
return 0
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/usr/bin/env sh
2+
return 1
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/usr/bin/env sh
2+
echo "gfx906"
3+
return 0
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/usr/bin/env sh
2+
echo gfx908
3+
echo gfx908
4+
return 0
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// REQUIRES: system-linux
2+
// REQUIRES: x86-registered-target
3+
// REQUIRES: amdgpu-registered-target
4+
// REQUIRES: shell
5+
6+
// RUN: mkdir -p %t
7+
// RUN: rm -f %t/amdgpu_arch_fail %t/amdgpu_arch_different
8+
// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_fail %t/
9+
// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_different %t/
10+
// RUN: echo '#!/usr/bin/env sh' > %t/amdgpu_arch_empty
11+
// RUN: chmod +x %t/amdgpu_arch_fail
12+
// RUN: chmod +x %t/amdgpu_arch_different
13+
// RUN: chmod +x %t/amdgpu_arch_empty
14+
15+
// case when amdgpu_arch returns nothing or fails
16+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
17+
// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
18+
// NO-OUTPUT-ERROR: error: Cannot determine AMDGPU architecture{{.*}}Exited with error code 1. Consider passing it via --march
19+
20+
// case when amdgpu_arch returns multiple gpus but all are different
21+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_different %s 2>&1 \
22+
// RUN: | FileCheck %s --check-prefix=MULTIPLE-OUTPUT-ERROR
23+
// MULTIPLE-OUTPUT-ERROR: error: Cannot determine AMDGPU architecture: Multiple AMD GPUs found with different archs. Consider passing it via --march
24+
25+
// case when amdgpu_arch does not return anything with successful execution
26+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \
27+
// RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT
28+
// EMPTY-OUTPUT: error: Cannot determine AMDGPU architecture: No AMD GPU detected in the system. Consider passing it via --march
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// REQUIRES: system-linux
2+
// REQUIRES: x86-registered-target
3+
// REQUIRES: amdgpu-registered-target
4+
// REQUIRES: shell
5+
6+
// RUN: mkdir -p %t
7+
// RUN: rm -f %t/amdgpu_arch_gfx906
8+
// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/
9+
// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 %t/
10+
// RUN: chmod +x %t/amdgpu_arch_gfx906
11+
// RUN: chmod +x %t/amdgpu_arch_gfx908_gfx908
12+
13+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
14+
// RUN: | FileCheck %s
15+
// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx906]]"
16+
// CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc"
17+
// CHECK: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-{{.*}}.o"
18+
19+
// case when amdgpu_arch returns multiple gpus but of same arch
20+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_gfx908_gfx908 %s 2>&1 \
21+
// RUN: | FileCheck %s --check-prefix=CHECK-MULTIPLE
22+
// CHECK-MULTIPLE: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx908]]"
23+
// CHECK-MULTIPLE: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc"
24+
// CHECK-MULTIPLE: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-{{.*}}.o"

clang/tools/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,5 @@ add_llvm_external_project(clang-tools-extra extra)
4343

4444
# libclang may require clang-tidy in clang-tools-extra.
4545
add_clang_subdirectory(libclang)
46+
47+
add_clang_subdirectory(amdgpu-arch)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
//===- AMDGPUArch.cpp - list AMDGPU installed ----------*- C++ -*---------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements a tool for detecting name of AMDGPU installed in system
10+
// using HSA. This tool is used by AMDGPU OpenMP driver.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include <hsa.h>
15+
#include <string>
16+
#include <vector>
17+
18+
static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
19+
hsa_device_type_t DeviceType;
20+
hsa_status_t Status =
21+
hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, &DeviceType);
22+
23+
// continue only if device type if GPU
24+
if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) {
25+
return Status;
26+
}
27+
28+
std::vector<std::string> *GPUs =
29+
static_cast<std::vector<std::string> *>(Data);
30+
char GPUName[64];
31+
Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
32+
if (Status != HSA_STATUS_SUCCESS) {
33+
return Status;
34+
}
35+
GPUs->push_back(GPUName);
36+
return HSA_STATUS_SUCCESS;
37+
}
38+
39+
int main() {
40+
hsa_status_t Status = hsa_init();
41+
if (Status != HSA_STATUS_SUCCESS) {
42+
return 1;
43+
}
44+
45+
std::vector<std::string> GPUs;
46+
Status = hsa_iterate_agents(iterateAgentsCallback, &GPUs);
47+
if (Status != HSA_STATUS_SUCCESS) {
48+
return 1;
49+
}
50+
51+
for (const auto &GPU : GPUs)
52+
printf("%s\n", GPU.c_str());
53+
54+
if (GPUs.size() < 1)
55+
return 1;
56+
57+
hsa_shut_down();
58+
return 0;
59+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# //===----------------------------------------------------------------------===//
2+
# //
3+
# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# // See https://llvm.org/LICENSE.txt for details.
5+
# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
# //
7+
# //===----------------------------------------------------------------------===//
8+
9+
find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
10+
if (NOT ${hsa-runtime64_FOUND})
11+
message(STATUS "Not building amdgpu-arch: hsa-runtime64 not found")
12+
return()
13+
endif()
14+
15+
add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
16+
17+
clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)

0 commit comments

Comments
 (0)