Skip to content

Commit eaa9ef0

Browse files
committed
[CUDA, FDO] Filter out profiling options from GPU-side compilations.
Differential Revision: https://reviews.llvm.org/D100598
1 parent b93629d commit eaa9ef0

File tree

2 files changed

+40
-23
lines changed

2 files changed

+40
-23
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4159,7 +4159,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
41594159
// include as part of the module. All other jobs are expected to have exactly
41604160
// one input.
41614161
bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
4162+
bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
41624163
bool IsHIP = JA.isOffloading(Action::OFK_HIP);
4164+
bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
41634165
bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
41644166
bool IsHeaderModulePrecompile = isa<HeaderModulePrecompileJobAction>(JA);
41654167

@@ -5003,8 +5005,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
50035005
// Prepare `-aux-target-cpu` and `-aux-target-feature` unless
50045006
// `--gpu-use-aux-triple-only` is specified.
50055007
if (!Args.getLastArg(options::OPT_gpu_use_aux_triple_only) &&
5006-
((IsCuda && JA.isDeviceOffloading(Action::OFK_Cuda)) ||
5007-
(IsHIP && JA.isDeviceOffloading(Action::OFK_HIP)))) {
5008+
(IsCudaDevice || IsHIPDevice)) {
50085009
const ArgList &HostArgs =
50095010
C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_None);
50105011
std::string HostCPU =
@@ -5824,29 +5825,32 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
58245825
Args.MakeArgString(Twine("-fcf-protection=") + A->getValue()));
58255826
}
58265827

5827-
// Forward -f options with positive and negative forms; we translate
5828-
// these by hand.
5829-
if (Arg *A = getLastProfileSampleUseArg(Args)) {
5830-
auto *PGOArg = Args.getLastArg(
5831-
options::OPT_fprofile_generate, options::OPT_fprofile_generate_EQ,
5832-
options::OPT_fcs_profile_generate, options::OPT_fcs_profile_generate_EQ,
5833-
options::OPT_fprofile_use, options::OPT_fprofile_use_EQ);
5834-
if (PGOArg)
5835-
D.Diag(diag::err_drv_argument_not_allowed_with)
5836-
<< "SampleUse with PGO options";
5828+
// Forward -f options with positive and negative forms; we translate these by
5829+
// hand. Do not propagate PGO options to the GPU-side compilations as the
5830+
// profile info is for the host-side compilation only.
5831+
if (!(IsCudaDevice || IsHIPDevice)) {
5832+
if (Arg *A = getLastProfileSampleUseArg(Args)) {
5833+
auto *PGOArg = Args.getLastArg(
5834+
options::OPT_fprofile_generate, options::OPT_fprofile_generate_EQ,
5835+
options::OPT_fcs_profile_generate,
5836+
options::OPT_fcs_profile_generate_EQ, options::OPT_fprofile_use,
5837+
options::OPT_fprofile_use_EQ);
5838+
if (PGOArg)
5839+
D.Diag(diag::err_drv_argument_not_allowed_with)
5840+
<< "SampleUse with PGO options";
5841+
5842+
StringRef fname = A->getValue();
5843+
if (!llvm::sys::fs::exists(fname))
5844+
D.Diag(diag::err_drv_no_such_file) << fname;
5845+
else
5846+
A->render(Args, CmdArgs);
5847+
}
5848+
Args.AddLastArg(CmdArgs, options::OPT_fprofile_remapping_file_EQ);
58375849

5838-
StringRef fname = A->getValue();
5839-
if (!llvm::sys::fs::exists(fname))
5840-
D.Diag(diag::err_drv_no_such_file) << fname;
5841-
else
5842-
A->render(Args, CmdArgs);
5850+
if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
5851+
options::OPT_fno_pseudo_probe_for_profiling, false))
5852+
CmdArgs.push_back("-fpseudo-probe-for-profiling");
58435853
}
5844-
Args.AddLastArg(CmdArgs, options::OPT_fprofile_remapping_file_EQ);
5845-
5846-
if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
5847-
options::OPT_fno_pseudo_probe_for_profiling, false))
5848-
CmdArgs.push_back("-fpseudo-probe-for-profiling");
5849-
58505854
RenderBuiltinOptions(TC, RawTriple, Args, CmdArgs);
58515855

58525856
if (!Args.hasFlag(options::OPT_fassume_sane_operator_new,

clang/test/Driver/clang_f_opts.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,19 @@
5858
// RUN: %clang -### -S -fprofile-sample-use=%S/Inputs/file.prof %s 2>&1 | FileCheck -check-prefix=CHECK-SAMPLE-PROFILE %s
5959
// CHECK-SAMPLE-PROFILE: "-fprofile-sample-use={{.*}}/file.prof"
6060

61+
//
62+
// RUN: %clang -### -x cuda -nocudainc -nocudalib \
63+
// RUN: -c -fprofile-sample-use=%S/Inputs/file.prof %s 2>&1 \
64+
// RUN: | FileCheck -check-prefix=CHECK-CUDA-SAMPLE-PROFILE %s
65+
// -fprofile-sample-use should not be passed to the GPU compilation
66+
// CHECK-CUDA-SAMPLE-PROFILE: "-cc1"
67+
// CHECK-CUDA-SAMPLE-PROFILE-SAME: "-triple" "nvptx
68+
// CHECK-CUDA-SAMPLE-PROFILE-NOT: "-fprofile-sample-use={{.*}}/file.prof"
69+
// Host compilation should still have the option.
70+
// CHECK-CUDA-SAMPLE-PROFILE: "-cc1"
71+
// CHECK-CUDA-SAMPLE-PROFILE-SAME: "-fprofile-sample-use={{.*}}/file.prof"
72+
73+
6174
// RUN: %clang -### -S -fauto-profile=%S/Inputs/file.prof %s 2>&1 | FileCheck -check-prefix=CHECK-AUTO-PROFILE %s
6275
// CHECK-AUTO-PROFILE: "-fprofile-sample-use={{.*}}/file.prof"
6376

0 commit comments

Comments
 (0)