Skip to content

Commit d63efae

Browse files
committed
[Clang] Handle -flto-partitions generically and forward it properly
Summary: The llvm#128509 patch introduced `--flto-partitions`. This was marked as a HIP only argument, and was also spelled and handled incorrectly for an `-f` option. This patch makes the handling generic for `ld.lld` consumers. This also fixes some issues with emitting the flags being put after the default arguments, preventing users from overriding them. Also, forwards things properly for the new driver so we can test this.
1 parent d0aa1f9 commit d63efae

File tree

9 files changed

+50
-54
lines changed

9 files changed

+50
-54
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1392,8 +1392,6 @@ def fhip_emit_relocatable : Flag<["-"], "fhip-emit-relocatable">,
13921392
HelpText<"Compile HIP source to relocatable">;
13931393
def fno_hip_emit_relocatable : Flag<["-"], "fno-hip-emit-relocatable">,
13941394
HelpText<"Do not override toolchain to compile HIP source to relocatable">;
1395-
def flto_partitions_EQ : Joined<["--"], "flto-partitions=">, Group<hip_Group>,
1396-
HelpText<"Number of partitions to use for parallel full LTO codegen. Use 1 to disable partitioning.">;
13971395
}
13981396

13991397
// Clang specific/exclusive options for OpenACC.
@@ -3043,6 +3041,8 @@ defm fat_lto_objects : BoolFOption<"fat-lto-objects",
30433041
PosFlag<SetTrue, [], [ClangOption, CC1Option], "Enable">,
30443042
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Disable">,
30453043
BothFlags<[], [ClangOption, CC1Option], " fat LTO object support">>;
3044+
def flto_partitions_EQ : Joined<["-"], "flto-partitions=">, Group<f_Group>,
3045+
HelpText<"Number of partitions to use for parallel full LTO codegen, ld.lld only.">;
30463046
def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">,
30473047
Group<f_Group>, Visibility<[ClangOption, CC1Option, CLOption]>,
30483048
HelpText<"Set the maximum number of entries to print in a macro expansion backtrace (0 = no limit)">,

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 4 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -625,22 +625,19 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
625625
CmdArgs.push_back("-shared");
626626
}
627627

628-
addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
629-
Args.AddAllArgs(CmdArgs, options::OPT_L);
630-
getToolChain().AddFilePathLibArgs(Args, CmdArgs);
631-
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
632628
if (C.getDriver().isUsingLTO()) {
633629
const bool ThinLTO = (C.getDriver().getLTOMode() == LTOK_Thin);
634630
addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], ThinLTO);
635-
636-
if (!ThinLTO && JA.getOffloadingDeviceKind() == Action::OFK_HIP)
637-
addFullLTOPartitionOption(C.getDriver(), Args, CmdArgs);
638631
} else if (Args.hasArg(options::OPT_mcpu_EQ)) {
639632
CmdArgs.push_back(Args.MakeArgString(
640633
"-plugin-opt=mcpu=" +
641634
getProcessorFromTargetID(getToolChain().getTriple(),
642635
Args.getLastArgValue(options::OPT_mcpu_EQ))));
643636
}
637+
addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
638+
getToolChain().AddFilePathLibArgs(Args, CmdArgs);
639+
Args.AddAllArgs(CmdArgs, options::OPT_L);
640+
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
644641

645642
// Always pass the target-id features to the LTO job.
646643
std::vector<StringRef> Features;
@@ -711,26 +708,6 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
711708
options::OPT_m_amdgpu_Features_Group);
712709
}
713710

714-
static unsigned getFullLTOPartitions(const Driver &D, const ArgList &Args) {
715-
int Value = 0;
716-
StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8");
717-
if (A.getAsInteger(10, Value) || (Value < 1)) {
718-
Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ);
719-
D.Diag(diag::err_drv_invalid_int_value)
720-
<< Arg->getAsString(Args) << Arg->getValue();
721-
return 1;
722-
}
723-
724-
return Value;
725-
}
726-
727-
void amdgpu::addFullLTOPartitionOption(const Driver &D,
728-
const llvm::opt::ArgList &Args,
729-
llvm::opt::ArgStringList &CmdArgs) {
730-
CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" +
731-
Twine(getFullLTOPartitions(D, Args))));
732-
}
733-
734711
/// AMDGPU Toolchain
735712
AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
736713
const ArgList &Args)

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9217,6 +9217,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
92179217
OPT_load,
92189218
OPT_fno_lto,
92199219
OPT_flto,
9220+
OPT_flto_partitions_EQ,
92209221
OPT_flto_EQ};
92219222
const llvm::DenseSet<unsigned> LinkerOptions{OPT_mllvm, OPT_Zlinker_input};
92229223
auto ShouldForward = [&](const llvm::DenseSet<unsigned> &Set, Arg *A) {
@@ -9226,7 +9227,8 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
92269227
};
92279228

92289229
ArgStringList CmdArgs;
9229-
for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
9230+
for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP,
9231+
Action::OFK_HIP, Action::OFK_SYCL}) {
92309232
auto TCRange = C.getOffloadToolChains(Kind);
92319233
for (auto &I : llvm::make_range(TCRange)) {
92329234
const ToolChain *TC = I.second;

clang/lib/Driver/ToolChains/CommonArgs.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,17 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
899899
// files
900900
if (IsFatLTO)
901901
CmdArgs.push_back("--fat-lto-objects");
902+
903+
if (Args.hasArg(options::OPT_flto_partitions_EQ)) {
904+
int Value = 0;
905+
StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8");
906+
if (A.getAsInteger(10, Value) || (Value < 1)) {
907+
Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ);
908+
D.Diag(diag::err_drv_invalid_int_value)
909+
<< Arg->getAsString(Args) << Arg->getValue();
910+
}
911+
CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" + A));
912+
}
902913
}
903914

904915
const char *PluginOptPrefix = IsOSAIX ? "-bplugin_opt:" : "-plugin-opt=";

clang/lib/Driver/ToolChains/HIPAMD.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,6 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
116116

117117
addLinkerCompressDebugSectionsOption(TC, Args, LldArgs);
118118

119-
amdgpu::addFullLTOPartitionOption(D, Args, LldArgs);
120-
121119
// Given that host and device linking happen in separate processes, the device
122120
// linker doesn't always have the visibility as to which device symbols are
123121
// needed by a program, especially for the device symbol dependencies that are
@@ -294,6 +292,10 @@ HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
294292
checkTargetID(*DAL);
295293
}
296294

295+
if (!Args.hasArg(options::OPT_flto_partitions_EQ))
296+
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_flto_partitions_EQ),
297+
"8");
298+
297299
return DAL;
298300
}
299301

clang/test/Driver/amdgpu-toolchain.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@
2020

2121
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- -nogpulib \
2222
// RUN: -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
23-
// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
24-
// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"
23+
// LTO: clang{{.*}}"-flto=full"{{.*}}"-fconvergent-functions"
24+
// LTO: ld.lld{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"{{.*}}
2525

2626
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- -nogpulib \
2727
// RUN: -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s
28-
// MCPU: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"
28+
// MCPU: ld.lld{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"{{.*}}
2929

3030
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
3131
// RUN: -fuse-ld=ld %s 2>&1 | FileCheck -check-prefixes=LD %s
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: %clang -### --target=x86_64-linux-gnu \
2-
// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=42 \
2+
// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=42 \
33
// RUN: --no-offload-new-driver --emit-static-lib -nogpulib \
44
// RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
55
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
@@ -10,26 +10,26 @@
1010
// FIXED-PARTS-NOT: ".*opt"
1111
// FIXED-PARTS-NOT: ".*llc"
1212
// FIXED-PARTS: [[LLD: ".*lld.*"]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
13-
// FIXED-PARTS-SAME: "-plugin-opt=mcpu=gfx803"
1413
// FIXED-PARTS-SAME: "--lto-partitions=42"
14+
// FIXED-PARTS-SAME: "-plugin-opt=mcpu=gfx803"
1515
// FIXED-PARTS-SAME: "-o" "{{.*out}}" "{{.*bc}}"
1616

1717
// RUN: not %clang -### --target=x86_64-linux-gnu \
18-
// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=a \
18+
// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=a \
1919
// RUN: --no-offload-new-driver --emit-static-lib -nogpulib \
2020
// RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
2121
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
2222
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
2323
// RUN: 2>&1 | FileCheck %s --check-prefix=LTO_PARTS_INV0
2424

25-
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
25+
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '-flto-partitions=a'
2626

2727
// RUN: not %clang -### --target=x86_64-linux-gnu \
28-
// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=0 \
28+
// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=0 \
2929
// RUN: --no-offload-new-driver --emit-static-lib -nogpulib \
3030
// RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
3131
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
3232
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
3333
// RUN: 2>&1 | FileCheck %s --check-prefix=LTO_PARTS_INV1
3434

35-
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'
35+
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '-flto-partitions=0'

clang/test/Driver/hip-toolchain-rdc-static-lib.hip

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@
4848
// CHECK-NOT: ".*opt"
4949
// CHECK-NOT: ".*llc"
5050
// CHECK: [[LLD: ".*lld.*"]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
51-
// CHECK-SAME: "-plugin-opt=mcpu=gfx803"
5251
// CHECK-SAME: "--lto-partitions={{[0-9]+}}"
52+
// CHECK-SAME: "-plugin-opt=mcpu=gfx803"
5353
// CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[A_BC1]] [[B_BC1]]
5454

5555
// generate image for device side path on gfx900
@@ -77,8 +77,8 @@
7777
// CHECK-NOT: ".*opt"
7878
// CHECK-NOT: ".*llc"
7979
// CHECK: [[LLD]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
80-
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
8180
// CHECK-SAME: "--lto-partitions={{[0-9]+}}"
81+
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
8282
// CHECK-SAME: "--whole-archive"
8383
// CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[A_BC2]] [[B_BC2]]
8484
// CHECK-SAME: "--no-whole-archive"

clang/test/Driver/hip-toolchain-rdc.hip

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@
146146
// CHECK-NOT: ".*opt"
147147
// CHECK-NOT: ".*llc"
148148
// CHECK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
149-
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
150149
// CHECK-SAME: "--lto-partitions={{[0-9]+}}"
150+
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
151151
// CHECK-SAME: "-o" "[[IMG_DEV2:.*.out]]" [[A_BC2]] [[B_BC2]]
152152

153153
// combine images generated into hip fat binary object
@@ -162,20 +162,24 @@
162162
// LNX: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
163163
// MSVC: [[LD:".*lld-link.*"]] {{.*}}"-out:a.exe" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
164164

165-
// Check --flto-partitions
165+
// Check -flto-partitions
166166

167-
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
167+
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
168168
// RUN: -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT %s
169169
// LTO_DEFAULT: lld{{.*}}"--lto-partitions=8"
170170

171-
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
172-
// RUN: -L. -foffload-lto --flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
171+
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --offload-new-driver \
172+
// RUN: -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT_NEW %s
173+
// LTO_DEFAULT_NEW: clang-linker-wrapper{{.*}}"--device-compiler=amdgcn-amd-amdhsa=-flto-partitions=8"
174+
175+
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
176+
// RUN: -L. -foffload-lto -flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
173177
// LTO_PARTS: lld{{.*}}"--lto-partitions=42"
174178

175-
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
176-
// RUN: -L. -foffload-lto --flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
177-
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
179+
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
180+
// RUN: -L. -foffload-lto -flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
181+
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '-flto-partitions=a'
178182

179-
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
180-
// RUN: -L. -foffload-lto --flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
181-
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'
183+
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
184+
// RUN: -L. -foffload-lto -flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
185+
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '-flto-partitions=0'

0 commit comments

Comments
 (0)