Skip to content

Commit 9a40858

Browse files
committed
[HIP][Clang][Driver] Add Driver support for hipstdpar
This patch adds the Driver changes needed for enabling HIP parallel algorithm offload on AMDGPU targets. What this change does can be summed up as follows: - add two flags, one for enabling `hipstdpar` compilation, the second enabling the optional allocation interposition mode; - the flags correspond to new LangOpt members; - if we are compiling or linking with --hipstdpar, we enable HIP; in the compilation case C and C++ inputs are treated as HIP inputs; - the ROCm / AMDGPU driver is augmented to look for and include an implementation detail forwarding header; we error out if the user requested `hipstdpar` but the header or its dependencies cannot be found. Tests for the behaviour described above are also added. Reviewed by: MaskRay, yaxunl Differential Revision: https://reviews.llvm.org/D155775
1 parent 481df27 commit 9a40858

File tree

12 files changed

+141
-2
lines changed

12 files changed

+141
-2
lines changed

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,16 @@ def err_drv_no_rocm_device_lib : Error<
7070
def err_drv_no_hip_runtime : Error<
7171
"cannot find HIP runtime; provide its path via '--rocm-path', or pass "
7272
"'-nogpuinc' to build without HIP runtime">;
73+
def err_drv_no_hipstdpar_lib : Error<
74+
"cannot find HIP Standard Parallelism Acceleration library; provide it via "
75+
"'--hipstdpar-path'">;
76+
def err_drv_no_hipstdpar_thrust_lib : Error<
77+
"cannot find rocThrust, which is required by the HIP Standard Parallelism "
78+
"Acceleration library; provide it via "
79+
"'--hipstdpar-thrust-path'">;
80+
def err_drv_no_hipstdpar_prim_lib : Error<
81+
"cannot find rocPrim, which is required by the HIP Standard Parallelism "
82+
"Acceleration library; provide it via '--hipstdpar-prim-path'">;
7383

7484
def err_drv_no_hipspv_device_lib : Error<
7585
"cannot find HIP device library%select{| for %1}0; provide its path via "

clang/include/clang/Basic/LangOptions.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,8 @@ ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL
280280

281281
LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")
282282
LANGOPT(OffloadUniformBlock, 1, 0, "Assume that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)")
283+
LANGOPT(HIPStdPar, 1, 0, "Enable Standard Parallel Algorithm Acceleration for HIP (experimental)")
284+
LANGOPT(HIPStdParInterposeAlloc, 1, 0, "Replace allocations / deallocations with HIP RT calls when Standard Parallel Algorithm Acceleration for HIP is enabled (Experimental)")
283285

284286
LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
285287
LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")

clang/include/clang/Driver/Options.td

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,6 +1258,32 @@ def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group<hip_Group>,
12581258
HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">;
12591259
def hip_path_EQ : Joined<["--"], "hip-path=">, Group<hip_Group>,
12601260
HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">;
1261+
def hipstdpar : Flag<["--"], "hipstdpar">,
1262+
Visibility<[ClangOption, CC1Option]>,
1263+
Group<CompileOnly_Group>,
1264+
HelpText<"Enable HIP acceleration for standard parallel algorithms">,
1265+
MarshallingInfoFlag<LangOpts<"HIPStdPar">>;
1266+
def hipstdpar_interpose_alloc : Flag<["--"], "hipstdpar-interpose-alloc">,
1267+
Visibility<[ClangOption, CC1Option]>,
1268+
Group<CompileOnly_Group>,
1269+
HelpText<"Replace all memory allocation / deallocation calls with "
1270+
"hipManagedMalloc / hipFree equivalents">,
1271+
MarshallingInfoFlag<LangOpts<"HIPStdParInterposeAlloc">>;
1272+
// TODO: use MarshallingInfo here
1273+
def hipstdpar_path_EQ : Joined<["--"], "hipstdpar-path=">, Group<i_Group>,
1274+
HelpText<
1275+
"HIP Standard Parallel Algorithm Acceleration library path, used for "
1276+
"finding and implicitly including the library header">;
1277+
def hipstdpar_thrust_path_EQ : Joined<["--"], "hipstdpar-thrust-path=">,
1278+
Group<i_Group>,
1279+
HelpText<
1280+
"rocThrust path, required by the HIP Standard Parallel Algorithm "
1281+
"Acceleration library, used to implicitly include the rocThrust library">;
1282+
def hipstdpar_prim_path_EQ : Joined<["--"], "hipstdpar-prim-path=">,
1283+
Group<i_Group>,
1284+
HelpText<
1285+
"rocPrim path, required by the HIP Standard Parallel Algorithm "
1286+
"Acceleration library, used to implicitly include the rocPrim library">;
12611287
def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group<hip_Group>,
12621288
HelpText<"ROCm device library path. Alternative to rocm-path.">;
12631289
def : Joined<["--"], "hip-device-lib-path=">, Alias<rocm_device_lib_path_EQ>;

clang/lib/Driver/Driver.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -767,7 +767,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
767767
[](std::pair<types::ID, const llvm::opt::Arg *> &I) {
768768
return types::isHIP(I.first);
769769
}) ||
770-
C.getInputArgs().hasArg(options::OPT_hip_link);
770+
C.getInputArgs().hasArg(options::OPT_hip_link) ||
771+
C.getInputArgs().hasArg(options::OPT_hipstdpar);
771772
if (IsCuda && IsHIP) {
772773
Diag(clang::diag::err_drv_mix_cuda_hip);
773774
return;
@@ -2705,6 +2706,10 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
27052706
}
27062707
}
27072708

2709+
if ((Ty == types::TY_C || Ty == types::TY_CXX) &&
2710+
Args.hasArgNoClaim(options::OPT_hipstdpar))
2711+
Ty = types::TY_HIP;
2712+
27082713
if (DiagnoseInputExistence(Args, Value, Ty, /*TypoCorrect=*/true))
27092714
Inputs.push_back(std::make_pair(Ty, A));
27102715

@@ -3915,6 +3920,11 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
39153920
phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);
39163921

39173922
if (FinalPhase == phases::Link) {
3923+
if (Args.hasArgNoClaim(options::OPT_hipstdpar)) {
3924+
Args.AddFlagArg(nullptr, getOpts().getOption(options::OPT_hip_link));
3925+
Args.AddFlagArg(nullptr,
3926+
getOpts().getOption(options::OPT_frtlib_add_rpath));
3927+
}
39183928
// Emitting LLVM while linking disabled except in HIPAMD Toolchain
39193929
if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link))
39203930
Diag(clang::diag::err_drv_emit_llvm_link);

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,20 @@ RocmInstallationDetector::RocmInstallationDetector(
329329
RocmDeviceLibPathArg =
330330
Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ);
331331
HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ);
332+
HIPStdParPathArg =
333+
Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_path_EQ);
334+
HasHIPStdParLibrary =
335+
!HIPStdParPathArg.empty() && D.getVFS().exists(HIPStdParPathArg +
336+
"/hipstdpar_lib.hpp");
337+
HIPRocThrustPathArg =
338+
Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_thrust_path_EQ);
339+
HasRocThrustLibrary = !HIPRocThrustPathArg.empty() &&
340+
D.getVFS().exists(HIPRocThrustPathArg + "/thrust");
341+
HIPRocPrimPathArg =
342+
Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_prim_path_EQ);
343+
HasRocPrimLibrary = !HIPRocPrimPathArg.empty() &&
344+
D.getVFS().exists(HIPRocPrimPathArg + "/rocprim");
345+
332346
if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) {
333347
HIPVersionArg = A->getValue();
334348
unsigned Major = ~0U;
@@ -507,6 +521,7 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
507521
ArgStringList &CC1Args) const {
508522
bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) &&
509523
!DriverArgs.hasArg(options::OPT_nohipwrapperinc);
524+
bool HasHipStdPar = DriverArgs.hasArg(options::OPT_hipstdpar);
510525

511526
if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
512527
// HIP header includes standard library wrapper headers under clang
@@ -529,8 +544,45 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
529544
CC1Args.push_back(DriverArgs.MakeArgString(P));
530545
}
531546

532-
if (DriverArgs.hasArg(options::OPT_nogpuinc))
547+
const auto HandleHipStdPar = [=, &DriverArgs, &CC1Args]() {
548+
if (!hasHIPStdParLibrary()) {
549+
D.Diag(diag::err_drv_no_hipstdpar_lib);
550+
return;
551+
}
552+
if (!HasRocThrustLibrary &&
553+
!D.getVFS().exists(getIncludePath() + "/thrust")) {
554+
D.Diag(diag::err_drv_no_hipstdpar_thrust_lib);
555+
return;
556+
}
557+
if (!HasRocPrimLibrary &&
558+
!D.getVFS().exists(getIncludePath() + "/rocprim")) {
559+
D.Diag(diag::err_drv_no_hipstdpar_prim_lib);
560+
return;
561+
}
562+
563+
const char *ThrustPath;
564+
if (HasRocThrustLibrary)
565+
ThrustPath = DriverArgs.MakeArgString(HIPRocThrustPathArg);
566+
else
567+
ThrustPath = DriverArgs.MakeArgString(getIncludePath() + "/thrust");
568+
569+
const char *PrimPath;
570+
if (HasRocPrimLibrary)
571+
PrimPath = DriverArgs.MakeArgString(HIPRocPrimPathArg);
572+
else
573+
PrimPath = DriverArgs.MakeArgString(getIncludePath() + "/rocprim");
574+
575+
CC1Args.append({"-idirafter", ThrustPath, "-idirafter", PrimPath,
576+
"-idirafter", DriverArgs.MakeArgString(HIPStdParPathArg),
577+
"-include", "hipstdpar_lib.hpp"});
578+
};
579+
580+
if (DriverArgs.hasArg(options::OPT_nogpuinc)) {
581+
if (HasHipStdPar)
582+
HandleHipStdPar();
583+
533584
return;
585+
}
534586

535587
if (!hasHIPRuntime()) {
536588
D.Diag(diag::err_drv_no_hip_runtime);
@@ -541,6 +593,8 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
541593
CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
542594
if (UsesRuntimeWrapper)
543595
CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"});
596+
if (HasHipStdPar)
597+
HandleHipStdPar();
544598
}
545599

546600
void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6580,6 +6580,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
65806580
CmdArgs.push_back("-fhip-new-launch-api");
65816581
Args.addOptInFlag(CmdArgs, options::OPT_fgpu_allow_device_init,
65826582
options::OPT_fno_gpu_allow_device_init);
6583+
Args.AddLastArg(CmdArgs, options::OPT_hipstdpar);
6584+
Args.AddLastArg(CmdArgs, options::OPT_hipstdpar_interpose_alloc);
65836585
Args.addOptInFlag(CmdArgs, options::OPT_fhip_kernel_arg_name,
65846586
options::OPT_fno_hip_kernel_arg_name);
65856587
}

clang/lib/Driver/ToolChains/HIPAMD.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
113113
"--no-undefined",
114114
"-shared",
115115
"-plugin-opt=-amdgpu-internalize-symbols"};
116+
if (Args.hasArg(options::OPT_hipstdpar))
117+
LldArgs.push_back("-plugin-opt=-amdgpu-enable-hipstdpar");
116118

117119
auto &TC = getToolChain();
118120
auto &D = TC.getDriver();
@@ -242,6 +244,8 @@ void HIPAMDToolChain::addClangTargetOptions(
242244
if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
243245
false))
244246
CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
247+
if (DriverArgs.hasArgNoClaim(options::OPT_hipstdpar))
248+
CC1Args.append({"-mllvm", "-amdgpu-enable-hipstdpar"});
245249

246250
StringRef MaxThreadsPerBlock =
247251
DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);

clang/lib/Driver/ToolChains/ROCm.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ class RocmInstallationDetector {
7777
const Driver &D;
7878
bool HasHIPRuntime = false;
7979
bool HasDeviceLibrary = false;
80+
bool HasHIPStdParLibrary = false;
81+
bool HasRocThrustLibrary = false;
82+
bool HasRocPrimLibrary = false;
8083

8184
// Default version if not detected or specified.
8285
const unsigned DefaultVersionMajor = 3;
@@ -96,6 +99,13 @@ class RocmInstallationDetector {
9699
std::vector<std::string> RocmDeviceLibPathArg;
97100
// HIP runtime path specified by --hip-path.
98101
StringRef HIPPathArg;
102+
// HIP Standard Parallel Algorithm acceleration library specified by
103+
// --hipstdpar-path
104+
StringRef HIPStdParPathArg;
105+
// rocThrust algorithm library specified by --hipstdpar-thrust-path
106+
StringRef HIPRocThrustPathArg;
107+
// rocPrim algorithm library specified by --hipstdpar-prim-path
108+
StringRef HIPRocPrimPathArg;
99109
// HIP version specified by --hip-version.
100110
StringRef HIPVersionArg;
101111
// Wheter -nogpulib is specified.
@@ -180,6 +190,9 @@ class RocmInstallationDetector {
180190
/// Check whether we detected a valid ROCm device library.
181191
bool hasDeviceLibrary() const { return HasDeviceLibrary; }
182192

193+
/// Check whether we detected a valid HIP STDPAR Acceleration library.
194+
bool hasHIPStdParLibrary() const { return HasHIPStdParLibrary; }
195+
183196
/// Print information about the detected ROCm installation.
184197
void print(raw_ostream &OS) const;
185198

clang/test/Driver/Inputs/hipstdpar/hipstdpar_lib.hpp

Whitespace-only changes.

clang/test/Driver/Inputs/hipstdpar/rocprim/.keep

Whitespace-only changes.

clang/test/Driver/Inputs/hipstdpar/thrust/.keep

Whitespace-only changes.

clang/test/Driver/hipstdpar.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: not %clang -### --hipstdpar -nogpulib -nogpuinc --compile %s 2>&1 | \
2+
// RUN: FileCheck --check-prefix=HIPSTDPAR-MISSING-LIB %s
3+
// RUN: %clang -### --hipstdpar --hipstdpar-path=%S/Inputs/hipstdpar \
4+
// RUN: --hipstdpar-thrust-path=%S/Inputs/hipstdpar/thrust \
5+
// RUN: --hipstdpar-prim-path=%S/Inputs/hipstdpar/rocprim \
6+
// RUN: -nogpulib -nogpuinc --compile %s 2>&1 | \
7+
// RUN: FileCheck --check-prefix=HIPSTDPAR-COMPILE %s
8+
// RUN: touch %t.o
9+
// RUN: %clang -### --hipstdpar %t.o 2>&1 | FileCheck --check-prefix=HIPSTDPAR-LINK %s
10+
11+
// HIPSTDPAR-MISSING-LIB: error: cannot find HIP Standard Parallelism Acceleration library; provide it via '--hipstdpar-path'
12+
// HIPSTDPAR-COMPILE: "-x" "hip"
13+
// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/thrust}}"
14+
// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/rocprim}}"
15+
// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/Inputs/hipstdpar}}"
16+
// HIPSTDPAR-COMPILE: "-include" "hipstdpar_lib.hpp"
17+
// HIPSTDPAR-LINK: "-rpath"
18+
// HIPSTDPAR-LINK: "-l{{.*hip.*}}"

0 commit comments

Comments
 (0)