Skip to content

Commit 2ae2564

Browse files
committed
[CUDA][HIP] Add -Xarch_device and -Xarch_host options
The argument after -Xarch_device will be added to the arguments for CUDA/HIP device compilation and will be removed for host compilation. The argument after -Xarch_host will be added to the arguments for CUDA/HIP host compilation and will be removed for device compilation. Differential Revision: https://reviews.llvm.org/D76520
1 parent d381b6a commit 2ae2564

File tree

7 files changed

+105
-24
lines changed

7 files changed

+105
-24
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,10 @@ def Xanalyzer : Separate<["-"], "Xanalyzer">,
466466
HelpText<"Pass <arg> to the static analyzer">, MetaVarName<"<arg>">,
467467
Group<StaticAnalyzer_Group>;
468468
def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[DriverOption]>;
469+
def Xarch_host : Separate<["-"], "Xarch_host">, Flags<[DriverOption]>,
470+
HelpText<"Pass <arg> to the CUDA/HIP host compilation">, MetaVarName<"<arg>">;
471+
def Xarch_device : Separate<["-"], "Xarch_device">, Flags<[DriverOption]>,
472+
HelpText<"Pass <arg> to the CUDA/HIP device compilation">, MetaVarName<"<arg>">;
469473
def Xassembler : Separate<["-"], "Xassembler">,
470474
HelpText<"Pass <arg> to the assembler">, MetaVarName<"<arg>">,
471475
Group<CompileOnly_Group>;

clang/include/clang/Driver/ToolChain.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -296,10 +296,20 @@ class ToolChain {
296296
SmallVectorImpl<llvm::opt::Arg *> &AllocatedArgs) const;
297297

298298
/// Append the argument following \p A to \p DAL assuming \p A is an Xarch
299-
/// argument.
300-
virtual void TranslateXarchArgs(const llvm::opt::DerivedArgList &Args,
301-
llvm::opt::Arg *&A,
302-
llvm::opt::DerivedArgList *DAL) const;
299+
/// argument. If \p AllocatedArgs is null pointer, synthesized arguments are
300+
/// added to \p DAL, otherwise they are appended to \p AllocatedArgs.
301+
virtual void TranslateXarchArgs(
302+
const llvm::opt::DerivedArgList &Args, llvm::opt::Arg *&A,
303+
llvm::opt::DerivedArgList *DAL,
304+
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs = nullptr) const;
305+
306+
/// Translate -Xarch_ arguments. If there are no such arguments, return
307+
/// a null pointer, otherwise return a DerivedArgList containing the
308+
/// translated arguments.
309+
virtual llvm::opt::DerivedArgList *
310+
TranslateXarchArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
311+
Action::OffloadKind DeviceOffloadKind,
312+
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs) const;
303313

304314
/// Choose a tool to use to handle the action \p JA.
305315
///

clang/lib/Driver/Compilation.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,16 +76,29 @@ Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch,
7676
*TranslatedArgs, SameTripleAsHost, AllocatedArgs);
7777
}
7878

79+
DerivedArgList *NewDAL = nullptr;
7980
if (!OpenMPArgs) {
81+
NewDAL = TC->TranslateXarchArgs(*TranslatedArgs, BoundArch,
82+
DeviceOffloadKind, &AllocatedArgs);
83+
} else {
84+
NewDAL = TC->TranslateXarchArgs(*OpenMPArgs, BoundArch, DeviceOffloadKind,
85+
&AllocatedArgs);
86+
if (!NewDAL)
87+
NewDAL = OpenMPArgs;
88+
else
89+
delete OpenMPArgs;
90+
}
91+
92+
if (!NewDAL) {
8093
Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch, DeviceOffloadKind);
8194
if (!Entry)
8295
Entry = TranslatedArgs;
8396
} else {
84-
Entry = TC->TranslateArgs(*OpenMPArgs, BoundArch, DeviceOffloadKind);
97+
Entry = TC->TranslateArgs(*NewDAL, BoundArch, DeviceOffloadKind);
8598
if (!Entry)
86-
Entry = OpenMPArgs;
99+
Entry = NewDAL;
87100
else
88-
delete OpenMPArgs;
101+
delete NewDAL;
89102
}
90103

91104
// Add allocated arguments to the final DAL.

clang/lib/Driver/ToolChain.cpp

Lines changed: 58 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,11 +1103,20 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOpenMPTargetArgs(
11031103
return nullptr;
11041104
}
11051105

1106-
void ToolChain::TranslateXarchArgs(const llvm::opt::DerivedArgList &Args,
1107-
llvm::opt::Arg *&A,
1108-
llvm::opt::DerivedArgList *DAL) const {
1106+
// TODO: Currently argument values separated by space e.g.
1107+
// -Xclang -mframe-pointer=no cannot be passed by -Xarch_. This should be
1108+
// fixed.
1109+
void ToolChain::TranslateXarchArgs(
1110+
const llvm::opt::DerivedArgList &Args, llvm::opt::Arg *&A,
1111+
llvm::opt::DerivedArgList *DAL,
1112+
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs) const {
11091113
const OptTable &Opts = getDriver().getOpts();
1110-
unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
1114+
unsigned ValuePos = 1;
1115+
if (A->getOption().matches(options::OPT_Xarch_device) ||
1116+
A->getOption().matches(options::OPT_Xarch_host))
1117+
ValuePos = 0;
1118+
1119+
unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(ValuePos));
11111120
unsigned Prev = Index;
11121121
std::unique_ptr<llvm::opt::Arg> XarchArg(Opts.ParseOneArg(Args, Index));
11131122

@@ -1130,5 +1139,49 @@ void ToolChain::TranslateXarchArgs(const llvm::opt::DerivedArgList &Args,
11301139
}
11311140
XarchArg->setBaseArg(A);
11321141
A = XarchArg.release();
1133-
DAL->AddSynthesizedArg(A);
1142+
if (!AllocatedArgs)
1143+
DAL->AddSynthesizedArg(A);
1144+
else
1145+
AllocatedArgs->push_back(A);
1146+
}
1147+
1148+
llvm::opt::DerivedArgList *ToolChain::TranslateXarchArgs(
1149+
const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
1150+
Action::OffloadKind OFK,
1151+
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs) const {
1152+
DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
1153+
bool Modified = false;
1154+
1155+
bool IsGPU = OFK == Action::OFK_Cuda || OFK == Action::OFK_HIP;
1156+
for (Arg *A : Args) {
1157+
bool NeedTrans = false;
1158+
bool Skip = false;
1159+
if (A->getOption().matches(options::OPT_Xarch_device)) {
1160+
NeedTrans = IsGPU;
1161+
Skip = !IsGPU;
1162+
} else if (A->getOption().matches(options::OPT_Xarch_host)) {
1163+
NeedTrans = !IsGPU;
1164+
Skip = IsGPU;
1165+
} else if (A->getOption().matches(options::OPT_Xarch__) && IsGPU) {
1166+
// Do not translate -Xarch_ options for non CUDA/HIP toolchain since
1167+
// they may need special translation.
1168+
// Skip this argument unless the architecture matches BoundArch
1169+
if (BoundArch.empty() || A->getValue(0) != BoundArch)
1170+
Skip = true;
1171+
else
1172+
NeedTrans = true;
1173+
}
1174+
if (NeedTrans || Skip)
1175+
Modified = true;
1176+
if (NeedTrans)
1177+
TranslateXarchArgs(Args, A, DAL, AllocatedArgs);
1178+
if (!Skip)
1179+
DAL->append(A);
1180+
}
1181+
1182+
if (Modified)
1183+
return DAL;
1184+
1185+
delete DAL;
1186+
return nullptr;
11341187
}

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -800,12 +800,6 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
800800
}
801801

802802
for (Arg *A : Args) {
803-
if (A->getOption().matches(options::OPT_Xarch__)) {
804-
// Skip this argument unless the architecture matches BoundArch
805-
if (BoundArch.empty() || A->getValue(0) != BoundArch)
806-
continue;
807-
TranslateXarchArgs(Args, A, DAL);
808-
}
809803
DAL->append(A);
810804
}
811805

clang/lib/Driver/ToolChains/HIP.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -378,12 +378,6 @@ HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
378378
const OptTable &Opts = getDriver().getOpts();
379379

380380
for (Arg *A : Args) {
381-
if (A->getOption().matches(options::OPT_Xarch__)) {
382-
// Skip this argument unless the architecture matches BoundArch.
383-
if (BoundArch.empty() || A->getValue(0) != BoundArch)
384-
continue;
385-
TranslateXarchArgs(Args, A, DAL);
386-
}
387381
DAL->append(A);
388382
}
389383

clang/test/Driver/hip-options.hip

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,16 @@
1313
// RUN: -mllvm -amdgpu-early-inline-all=true %s 2>&1 | \
1414
// RUN: FileCheck -check-prefix=MLLVM %s
1515
// MLLVM-NOT: "-mllvm"{{.*}}"-amdgpu-early-inline-all=true"{{.*}}"-mllvm"{{.*}}"-amdgpu-early-inline-all=true"
16+
17+
// RUN: %clang -### -Xarch_device -g -nogpulib --cuda-gpu-arch=gfx900 \
18+
// RUN: -Xarch_device -fcf-protection=branch \
19+
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEV %s
20+
// DEV: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch"
21+
// DEV: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch"
22+
// DEV-NOT: clang{{.*}} {{.*}} "-debug-info-kind={{.*}}"
23+
24+
// RUN: %clang -### -Xarch_host -g -nogpulib --cuda-gpu-arch=gfx900 \
25+
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=HOST %s
26+
// HOST-NOT: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
27+
// HOST-NOT: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
28+
// HOST: clang{{.*}} "-debug-info-kind={{.*}}"

0 commit comments

Comments
 (0)