Skip to content

Commit 775d5f3

Browse files
authored
(cherry-pick) Fixes and Improvements for -flto-partitions option (llvm#1416)
2 parents d42900d + cf66fd8 commit 775d5f3

File tree

8 files changed

+60
-50
lines changed

8 files changed

+60
-50
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,8 +1458,6 @@ def fhip_emit_relocatable : Flag<["-"], "fhip-emit-relocatable">,
14581458
HelpText<"Compile HIP source to relocatable">;
14591459
def fno_hip_emit_relocatable : Flag<["-"], "fno-hip-emit-relocatable">,
14601460
HelpText<"Do not override toolchain to compile HIP source to relocatable">;
1461-
def flto_partitions_EQ : Joined<["--"], "flto-partitions=">, Group<hip_Group>,
1462-
HelpText<"Number of partitions to use for parallel full LTO codegen. Use 1 to disable partitioning.">;
14631461
}
14641462

14651463
// Clang specific/exclusive options for OpenACC.
@@ -3002,6 +3000,8 @@ defm fat_lto_objects : BoolFOption<"fat-lto-objects",
30023000
PosFlag<SetTrue, [], [ClangOption, CC1Option], "Enable">,
30033001
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Disable">,
30043002
BothFlags<[], [ClangOption, CC1Option], " fat LTO object support">>;
3003+
def flto_partitions_EQ : Joined<["-"], "flto-partitions=">, Group<f_Group>,
3004+
HelpText<"Number of partitions to use for parallel full LTO codegen, ld.lld only.">;
30053005
def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">,
30063006
Group<f_Group>, Visibility<[ClangOption, CC1Option, CLOption]>,
30073007
HelpText<"Set the maximum number of entries to print in a macro expansion backtrace (0 = no limit)">,

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -624,19 +624,18 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
624624
CmdArgs.push_back("--no-undefined");
625625
CmdArgs.push_back("-shared");
626626

627-
addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
628-
Args.AddAllArgs(CmdArgs, options::OPT_L);
629-
getToolChain().AddFilePathLibArgs(Args, CmdArgs);
630-
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
631627
if (C.getDriver().isUsingLTO()) {
632628
const bool ThinLTO = (C.getDriver().getLTOMode() == LTOK_Thin);
633629
addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], ThinLTO);
634-
635-
if (!ThinLTO)
636-
addFullLTOPartitionOption(C.getDriver(), Args, CmdArgs);
637630
} else if (Args.hasArg(options::OPT_mcpu_EQ))
638631
CmdArgs.push_back(Args.MakeArgString(
639632
"-plugin-opt=mcpu=" + Args.getLastArgValue(options::OPT_mcpu_EQ)));
633+
634+
addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
635+
Args.AddAllArgs(CmdArgs, options::OPT_L);
636+
getToolChain().AddFilePathLibArgs(Args, CmdArgs);
637+
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
638+
640639
CmdArgs.push_back("-o");
641640
CmdArgs.push_back(Output.getFilename());
642641
C.addCommand(std::make_unique<Command>(
@@ -645,14 +644,12 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
645644
}
646645

647646
static unsigned getFullLTOPartitions(const Driver &D, const ArgList &Args) {
648-
const Arg *A = Args.getLastArg(options::OPT_flto_partitions_EQ);
649-
// In the absence of an option, use 8 as the default.
650-
if (!A)
651-
return 8;
652647
int Value = 0;
653-
if (StringRef(A->getValue()).getAsInteger(10, Value) || (Value < 1)) {
648+
StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8");
649+
if (A.getAsInteger(10, Value) || (Value < 1)) {
650+
Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ);
654651
D.Diag(diag::err_drv_invalid_int_value)
655-
<< A->getAsString(Args) << A->getValue();
652+
<< Arg->getAsString(Args) << Arg->getValue();
656653
return 1;
657654
}
658655

@@ -662,13 +659,8 @@ static unsigned getFullLTOPartitions(const Driver &D, const ArgList &Args) {
662659
void amdgpu::addFullLTOPartitionOption(const Driver &D,
663660
const llvm::opt::ArgList &Args,
664661
llvm::opt::ArgStringList &CmdArgs) {
665-
// TODO: Should this be restricted to fgpu-rdc only ? Currently we'll
666-
// also do it for non gpu-rdc LTO
667-
668-
if (unsigned NumParts = getFullLTOPartitions(D, Args); NumParts > 1) {
669-
CmdArgs.push_back(
670-
Args.MakeArgString("--lto-partitions=" + Twine(NumParts)));
671-
}
662+
CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" +
663+
Twine(getFullLTOPartitions(D, Args))));
672664
}
673665

674666
void amdgpu::getAMDGPUTargetFeatures(const Driver &D,

clang/lib/Driver/ToolChains/CommonArgs.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,17 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
863863
// files
864864
if (IsFatLTO)
865865
CmdArgs.push_back("--fat-lto-objects");
866+
867+
if (Args.hasArg(options::OPT_flto_partitions_EQ)) {
868+
int Value = 0;
869+
StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8");
870+
if (A.getAsInteger(10, Value) || (Value < 1)) {
871+
Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ);
872+
D.Diag(diag::err_drv_invalid_int_value)
873+
<< Arg->getAsString(Args) << Arg->getValue();
874+
}
875+
CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" + A));
876+
}
866877
}
867878

868879
const char *PluginOptPrefix = IsOSAIX ? "-bplugin_opt:" : "-plugin-opt=";

clang/lib/Driver/ToolChains/HIPAMD.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,6 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
160160

161161
addLinkerCompressDebugSectionsOption(TC, Args, LldArgs);
162162

163-
amdgpu::addFullLTOPartitionOption(D, Args, LldArgs);
164-
165163
// Given that host and device linking happen in separate processes, the device
166164
// linker doesn't always have the visibility as to which device symbols are
167165
// needed by a program, especially for the device symbol dependencies that are
@@ -355,6 +353,10 @@ HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
355353
checkTargetID(*DAL);
356354
}
357355

356+
if (!Args.hasArg(options::OPT_flto_partitions_EQ))
357+
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_flto_partitions_EQ),
358+
"8");
359+
358360
return DAL;
359361
}
360362

clang/test/Driver/amdgpu-toolchain.c

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,13 @@
2020

2121
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
2222
// RUN: -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
23-
// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
24-
// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"{{.*}}"--lto-partitions={{[0-9]+}}"
23+
// LTO: clang{{.*}}"-flto=full"{{.*}}"-fconvergent-functions"
24+
// LTO: ld.lld{{.*}}"-plugin-opt=mcpu=gfx906"{{.*}}"{{.*}}
25+
2526
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
2627
// RUN: -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s
27-
// MCPU: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
28+
// MCPU: ld.lld{{.*}}"-plugin-opt=mcpu=gfx906"{{.*}}
2829

2930
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
3031
// RUN: -fuse-ld=ld %s 2>&1 | FileCheck -check-prefixes=LD %s
3132
// LD: ld.lld
32-
33-
// Check --flto-partitions
34-
35-
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
36-
// RUN: -L. -flto --flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
37-
// LTO_PARTS: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"--lto-partitions=42"
38-
39-
// RUN: not %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
40-
// RUN: -L. -flto --flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
41-
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
42-
43-
// RUN: not %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
44-
// RUN: -L. -flto --flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
45-
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: %clang -### --target=x86_64-linux-gnu \
2-
// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=42 \
2+
// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=42 \
33
// RUN: --no-offload-new-driver --emit-static-lib -nogpulib \
44
// RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
55
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
@@ -10,26 +10,26 @@
1010
// FIXED-PARTS-NOT: ".*opt"
1111
// FIXED-PARTS-NOT: ".*llc"
1212
// FIXED-PARTS: [[LLD: ".*lld.*"]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
13-
// FIXED-PARTS-SAME: "-plugin-opt=mcpu=gfx803"
1413
// FIXED-PARTS-SAME: "--lto-partitions=42"
14+
// FIXED-PARTS-SAME: "-plugin-opt=mcpu=gfx803"
1515
// FIXED-PARTS-SAME: "-o" "{{.*out}}" "{{.*bc}}"
1616

1717
// RUN: not %clang -### --target=x86_64-linux-gnu \
18-
// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=a \
18+
// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=a \
1919
// RUN: --no-offload-new-driver --emit-static-lib -nogpulib \
2020
// RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
2121
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
2222
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
2323
// RUN: 2>&1 | FileCheck %s --check-prefix=LTO_PARTS_INV0
2424

25-
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
25+
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '-flto-partitions=a'
2626

2727
// RUN: not %clang -### --target=x86_64-linux-gnu \
28-
// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=0 \
28+
// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=0 \
2929
// RUN: --no-offload-new-driver --emit-static-lib -nogpulib \
3030
// RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
3131
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
3232
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
3333
// RUN: 2>&1 | FileCheck %s --check-prefix=LTO_PARTS_INV1
3434

35-
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'
35+
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '-flto-partitions=0'

clang/test/Driver/hip-toolchain-rdc-static-lib.hip

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@
4848
// CHECK-NOT: ".*opt"
4949
// CHECK-NOT: ".*llc"
5050
// CHECK: [[LLD: ".*lld.*"]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
51-
// CHECK-SAME: "-plugin-opt=mcpu=gfx803"
5251
// CHECK-SAME: "--lto-partitions={{[0-9]+}}"
52+
// CHECK-SAME: "-plugin-opt=mcpu=gfx803"
5353
// CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[A_BC1]] [[B_BC1]]
5454

5555
// generate image for device side path on gfx900
@@ -77,8 +77,8 @@
7777
// CHECK-NOT: ".*opt"
7878
// CHECK-NOT: ".*llc"
7979
// CHECK: [[LLD]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
80-
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
8180
// CHECK-SAME: "--lto-partitions={{[0-9]+}}"
81+
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
8282
// CHECK-SAME: "--whole-archive"
8383
// CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[A_BC2]] [[B_BC2]]
8484
// CHECK-SAME: "--no-whole-archive"

clang/test/Driver/hip-toolchain-rdc.hip

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@
146146
// CHECK-NOT: ".*opt"
147147
// CHECK-NOT: ".*llc"
148148
// CHECK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
149-
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
150149
// CHECK-SAME: "--lto-partitions={{[0-9]+}}"
150+
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
151151
// CHECK-SAME: "-o" "[[IMG_DEV2:.*.out]]" [[A_BC2]] [[B_BC2]]
152152

153153
// combine images generated into hip fat binary object
@@ -161,3 +161,21 @@
161161
// output the executable
162162
// LNX: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
163163
// MSVC: [[LD:".*lld-link.*"]] {{.*}}"-out:a.exe" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
164+
165+
// Check -flto-partitions
166+
167+
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
168+
// RUN: -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT %s
169+
// LTO_DEFAULT: lld{{.*}}"--lto-partitions=8"
170+
171+
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
172+
// RUN: -L. -foffload-lto -flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
173+
// LTO_PARTS: lld{{.*}}"--lto-partitions=42"
174+
175+
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
176+
// RUN: -L. -foffload-lto -flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
177+
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '-flto-partitions=a'
178+
179+
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
180+
// RUN: -L. -foffload-lto -flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
181+
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '-flto-partitions=0'

0 commit comments

Comments
 (0)