Skip to content

Commit b3d463a

Browse files
jhuber6Pierre-vh
authored andcommitted
(cherry-pick) [Clang] Make --lto-partitions only default for HIP (llvm#133164)
Summary: The default behavior for LTO on other targets does not specify the number of LTO partitions. Recent changes made this default to 8 on AMDGPU which had some issues with the `libc` project. The option to disable this is HIP only so I think for now we should restrict this just to HIP. I'm definitely on board with getting some more parallelism here, but I think it should probably be restricted to just offloading languages. The new driver goes through the `--target=amdgcn-amd-amdhsa` for its output, which means we'd need to forward the default somehow.
1 parent dbadd95 commit b3d463a

File tree

3 files changed

+27
-29
lines changed

3 files changed

+27
-29
lines changed

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
632632
const bool ThinLTO = (C.getDriver().getLTOMode() == LTOK_Thin);
633633
addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], ThinLTO);
634634

635-
if (!ThinLTO)
635+
if (!ThinLTO && JA.getOffloadingDeviceKind() == Action::OFK_HIP)
636636
addFullLTOPartitionOption(C.getDriver(), Args, CmdArgs);
637637
} else if (Args.hasArg(options::OPT_mcpu_EQ))
638638
CmdArgs.push_back(Args.MakeArgString(
@@ -645,14 +645,12 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
645645
}
646646

647647
static unsigned getFullLTOPartitions(const Driver &D, const ArgList &Args) {
648-
const Arg *A = Args.getLastArg(options::OPT_flto_partitions_EQ);
649-
// In the absence of an option, use 8 as the default.
650-
if (!A)
651-
return 8;
652648
int Value = 0;
653-
if (StringRef(A->getValue()).getAsInteger(10, Value) || (Value < 1)) {
649+
StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8");
650+
if (A.getAsInteger(10, Value) || (Value < 1)) {
651+
Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ);
654652
D.Diag(diag::err_drv_invalid_int_value)
655-
<< A->getAsString(Args) << A->getValue();
653+
<< Arg->getAsString(Args) << Arg->getValue();
656654
return 1;
657655
}
658656

@@ -662,13 +660,8 @@ static unsigned getFullLTOPartitions(const Driver &D, const ArgList &Args) {
662660
void amdgpu::addFullLTOPartitionOption(const Driver &D,
663661
const llvm::opt::ArgList &Args,
664662
llvm::opt::ArgStringList &CmdArgs) {
665-
// TODO: Should this be restricted to fgpu-rdc only ? Currently we'll
666-
// also do it for non gpu-rdc LTO
667-
668-
if (unsigned NumParts = getFullLTOPartitions(D, Args); NumParts > 1) {
669-
CmdArgs.push_back(
670-
Args.MakeArgString("--lto-partitions=" + Twine(NumParts)));
671-
}
663+
CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" +
664+
Twine(getFullLTOPartitions(D, Args))));
672665
}
673666

674667
void amdgpu::getAMDGPUTargetFeatures(const Driver &D,

clang/test/Driver/amdgpu-toolchain.c

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,25 +21,12 @@
2121
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
2222
// RUN: -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
2323
// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
24-
// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"{{.*}}"--lto-partitions={{[0-9]+}}"
24+
// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"{{.*}}"
25+
2526
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
2627
// RUN: -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s
2728
// MCPU: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
2829

2930
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
3031
// RUN: -fuse-ld=ld %s 2>&1 | FileCheck -check-prefixes=LD %s
3132
// LD: ld.lld
32-
33-
// Check --flto-partitions
34-
35-
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
36-
// RUN: -L. -flto --flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
37-
// LTO_PARTS: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"--lto-partitions=42"
38-
39-
// RUN: not %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
40-
// RUN: -L. -flto --flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
41-
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
42-
43-
// RUN: not %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
44-
// RUN: -L. -flto --flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
45-
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'

clang/test/Driver/hip-toolchain-rdc.hip

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,21 @@
161161
// output the executable
162162
// LNX: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
163163
// MSVC: [[LD:".*lld-link.*"]] {{.*}}"-out:a.exe" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
164+
165+
// Check --flto-partitions
166+
167+
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
168+
// RUN: -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT %s
169+
// LTO_DEFAULT: lld{{.*}}"--lto-partitions=8"
170+
171+
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
172+
// RUN: -L. -foffload-lto --flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
173+
// LTO_PARTS: lld{{.*}}"--lto-partitions=42"
174+
175+
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
176+
// RUN: -L. -foffload-lto --flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
177+
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
178+
179+
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
180+
// RUN: -L. -foffload-lto --flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
181+
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'

0 commit comments

Comments
 (0)