Skip to content

Commit ac135f9

Browse files
committed
[Clang] Fix using LTO with the new driver in RDC-mode
The new driver supports LTO for RDC-mode compilations. However, this was not correctly handled for non-LTO compilations. HIP can handle this as it is fed to `lld` which will perform the LTO itself. CUDA however would require every work which is wholly useless in non-RDC mode so it should report an error. Reviewed By: yaxunl Differential Revision: https://reviews.llvm.org/D135305
1 parent f47d5dc commit ac135f9

File tree

6 files changed

+77
-2
lines changed

6 files changed

+77
-2
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4375,7 +4375,9 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
43754375

43764376
// Compiling HIP in non-RDC mode requires linking each action individually.
43774377
for (Action *&A : DeviceActions) {
4378-
if (A->getType() != types::TY_Object || Kind != Action::OFK_HIP ||
4378+
if ((A->getType() != types::TY_Object &&
4379+
A->getType() != types::TY_LTO_BC) ||
4380+
Kind != Action::OFK_HIP ||
43794381
Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
43804382
continue;
43814383
ActionList LinkerInput = {A};

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4842,7 +4842,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
48424842
CmdArgs.push_back("-emit-llvm-uselists");
48434843

48444844
if (IsUsingLTO) {
4845-
// Only AMDGPU supports device-side LTO.
48464845
if (IsDeviceOffloadAction && !JA.isDeviceOffloading(Action::OFK_OpenMP) &&
48474846
!Args.hasFlag(options::OPT_offload_new_driver,
48484847
options::OPT_no_offload_new_driver, false) &&
@@ -4852,6 +4851,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
48524851
options::OPT_foffload_lto_EQ)
48534852
->getAsString(Args)
48544853
<< Triple.getTriple();
4854+
} else if (Triple.isNVPTX() && !IsRDCMode) {
4855+
D.Diag(diag::err_drv_unsupported_opt_for_language_mode)
4856+
<< Args.getLastArg(options::OPT_foffload_lto,
4857+
options::OPT_foffload_lto_EQ)
4858+
->getAsString(Args)
4859+
<< "-fno-gpu-rdc";
48554860
} else {
48564861
assert(LTOMode == LTOK_Full || LTOMode == LTOK_Thin);
48574862
CmdArgs.push_back(Args.MakeArgString(

clang/test/Driver/cuda-bindings.cu

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,3 +234,11 @@
234234
// SAVE-TEMPS: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_52"
235235
// SAVE-TEMPS: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
236236
// SAVE-TEMPS: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu"
237+
238+
//
239+
// Check to ensure that we cannot use '-foffload' when not operating in RDC-mode.
240+
//
241+
// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -fno-gpu-rdc --offload-new-driver \
242+
// RUN: -foffload-lto --offload-arch=sm_70 --offload-arch=sm_52 -c %s 2>&1 \
243+
// RUN: | FileCheck -check-prefix=LTO-NO-RDC %s
244+
// LTO-NO-RDC: error: unsupported option '-foffload-lto' for language mode '-fno-gpu-rdc'

clang/test/Driver/cuda-phases.cu

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,3 +294,27 @@
294294
// NON-CUDA-INPUT-NEXT: 22: backend, {21}, assembler, (host-cuda)
295295
// NON-CUDA-INPUT-NEXT: 23: assembler, {22}, object, (host-cuda)
296296
// NON-CUDA-INPUT-NEXT: 24: clang-linker-wrapper, {18, 23}, image, (host-cuda)
297+
298+
//
299+
// Test the phases using the new driver in LTO-mode.
300+
//
301+
// RUN: %clang -### -target powerpc64le-ibm-linux-gnu --offload-new-driver -ccc-print-phases \
302+
// RUN: --offload-arch=sm_70 --offload-arch=sm_52 -foffload-lto -fgpu-rdc -c %s 2>&1 \
303+
// RUN: | FileCheck -check-prefix=LTO %s
304+
// LTO: 0: input, "[[INPUT:.+]]", cuda, (host-cuda)
305+
// LTO-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
306+
// LTO-NEXT: 2: compiler, {1}, ir, (host-cuda)
307+
// LTO-NEXT: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
308+
// LTO-NEXT: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
309+
// LTO-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52)
310+
// LTO-NEXT: 6: backend, {5}, lto-bc, (device-cuda, sm_52)
311+
// LTO-NEXT: 7: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, lto-bc
312+
// LTO-NEXT: 8: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
313+
// LTO-NEXT: 9: preprocessor, {8}, cuda-cpp-output, (device-cuda, sm_70)
314+
// LTO-NEXT: 10: compiler, {9}, ir, (device-cuda, sm_70)
315+
// LTO-NEXT: 11: backend, {10}, lto-bc, (device-cuda, sm_70)
316+
// LTO-NEXT: 12: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {11}, lto-bc
317+
// LTO-NEXT: 13: clang-offload-packager, {7, 12}, image, (device-cuda)
318+
// LTO-NEXT: 14: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (powerpc64le-ibm-linux-gnu)" {13}, ir
319+
// LTO-NEXT: 15: backend, {14}, assembler, (host-cuda)
320+
// LTO-NEXT: 16: assembler, {15}, object, (host-cuda)

clang/test/Driver/hip-binding.hip

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,15 @@
7979
// SYNTAX-ONLY: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-fsyntax-only"
8080
// SYNTAX-ONLY: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-fsyntax-only"
8181
// SYNTAX-ONLY: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-fsyntax-only"
82+
83+
//
84+
// Check to ensure that we can use '-foffload' when not operating in RDC-mode.
85+
//
86+
// RUN: %clang -### --target=x86_64-linux-gnu -fno-gpu-rdc --offload-new-driver -ccc-print-bindings \
87+
// RUN: -foffload-lto --offload-arch=gfx90a --offload-arch=gfx908 -c %s 2>&1 \
88+
// RUN: | FileCheck -check-prefix=LTO-NO-RDC %s
89+
// LTO-NO-RDC: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[LTO_908:.+]]"
90+
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[LTO_908]]"], output: "[[OBJ_908:.+]]"
91+
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], output: "[[LTO_90A:.+]]"
92+
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[LTO_90A]]"], output: "[[OBJ_90A:.+]]"
93+
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ_908]]", "[[OBJ_90A]]"], output: "[[HIPFB:.+]]"

clang/test/Driver/hip-phases.hip

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,3 +546,27 @@
546546

547547
// CHECK: [[L0:[0-9]+]]: linker, {[[A3]], [[B3]]}, ir, (device-hip, [[ARCH]])
548548
// CHECK: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[L0]]}, ir
549+
550+
//
551+
// Test the bindings using the new driver in LTO-mode.
552+
//
553+
// RUN: %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-phases \
554+
// RUN: --offload-arch=gfx90a --offload-arch=gfx908 -foffload-lto -fgpu-rdc -c %s 2>&1 \
555+
// RUN: | FileCheck -check-prefix=LTO %s
556+
// LTO: 0: input, "[[INPUT:.+]]", hip, (host-hip)
557+
// LTO-NEXT: 1: preprocessor, {0}, hip-cpp-output, (host-hip)
558+
// LTO-NEXT: 2: compiler, {1}, ir, (host-hip)
559+
// LTO-NEXT: 3: input, "[[INPUT]]", hip, (device-hip, gfx908)
560+
// LTO-NEXT: 4: preprocessor, {3}, hip-cpp-output, (device-hip, gfx908)
561+
// LTO-NEXT: 5: compiler, {4}, ir, (device-hip, gfx908)
562+
// LTO-NEXT: 6: backend, {5}, lto-bc, (device-hip, gfx908)
563+
// LTO-NEXT: 7: offload, "device-hip (amdgcn-amd-amdhsa:gfx908)" {6}, lto-bc
564+
// LTO-NEXT: 8: input, "[[INPUT]]", hip, (device-hip, gfx90a)
565+
// LTO-NEXT: 9: preprocessor, {8}, hip-cpp-output, (device-hip, gfx90a)
566+
// LTO-NEXT: 10: compiler, {9}, ir, (device-hip, gfx90a)
567+
// LTO-NEXT: 11: backend, {10}, lto-bc, (device-hip, gfx90a)
568+
// LTO-NEXT: 12: offload, "device-hip (amdgcn-amd-amdhsa:gfx90a)" {11}, lto-bc
569+
// LTO-NEXT: 13: clang-offload-packager, {7, 12}, image, (device-hip)
570+
// LTO-NEXT: 14: offload, "host-hip (x86_64-unknown-linux-gnu)" {2}, "device-hip (x86_64-unknown-linux-gnu)" {13}, ir
571+
// LTO-NEXT: 15: backend, {14}, assembler, (host-hip)
572+
// LTO-NEXT: 16: assembler, {15}, object, (host-hip)

0 commit comments

Comments
 (0)