Skip to content

Commit cbc4bbb

Browse files
committed
[HIP] Ignore host linker flags for device-only
When compiling in device only mode (e.g. --offload-device-only), the host linker phase would not happen and therefore, the driver should ignore all the host linker flags. Differential Revision: https://reviews.llvm.org/D154881 Change-Id: I8244acef5c33108cf15b1dbb188f974f30099718
1 parent 7f08f44 commit cbc4bbb

File tree

2 files changed

+45
-2
lines changed

2 files changed

+45
-2
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4146,9 +4146,11 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
41464146
// Queue linker inputs.
41474147
if (Phase == phases::Link) {
41484148
assert(Phase == PL.back() && "linking must be final compilation step.");
4149-
// We don't need to generate additional link commands if emitting AMD bitcode
4149+
// We don't need to generate additional link commands if emitting AMD
4150+
// bitcode or compiling only for the offload device
41504151
if (!(C.getInputArgs().hasArg(options::OPT_hip_link) &&
4151-
(C.getInputArgs().hasArg(options::OPT_emit_llvm))))
4152+
(C.getInputArgs().hasArg(options::OPT_emit_llvm))) &&
4153+
!offloadDeviceOnly())
41524154
LinkerInputs.push_back(Current);
41534155
Current = nullptr;
41544156
break;

clang/test/Driver/hip-phases.hip

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,14 @@
219219
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
220220
// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only 2>&1 \
221221
// RUN: | FileCheck -check-prefixes=DBIN %s
222+
//
223+
// Test single gpu architecture with complete compilation in device-only
224+
// compilation mode with an unused host linker flag.
225+
//
226+
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
227+
// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -Wl,--disable-new-dtags 2>&1 \
228+
// RUN: | FileCheck -check-prefixes=DBIN %s
229+
222230
// DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
223231
// DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
224232
// DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
@@ -229,6 +237,7 @@
229237
// DBIN-DAG: [[P7:[0-9]+]]: linker, {[[P6]]}, hip-fatbin, (device-hip, )
230238
// DBIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P7]]}, hip-fatbin
231239
// DBIN-NOT: host
240+
232241
//
233242
// Test single gpu architecture up to the assemble phase in device-only
234243
// compilation mode.
@@ -251,13 +260,19 @@
251260
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
252261
// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable 2>&1 \
253262
// RUN: | FileCheck -check-prefixes=RELOC %s
263+
//
264+
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
265+
// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable -Wl,--disable-new-dtags \
266+
// RUN: 2>&1 | FileCheck -check-prefixes=RELOC %s
267+
//
254268
// RELOC-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
255269
// RELOC-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
256270
// RELOC-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
257271
// RELOC-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
258272
// RELOC-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
259273
// RELOC-NOT: linker
260274
// RELOC-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}, object
275+
// RELOC-NOT: host
261276

262277
//
263278
// Test two gpu architectures with compile to relocatable in device-only
@@ -266,6 +281,11 @@
266281
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
267282
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only -fhip-emit-relocatable 2>&1 \
268283
// RUN: | FileCheck -check-prefixes=RELOC2 %s
284+
//
285+
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
286+
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only -fhip-emit-relocatable \
287+
// RUN: -Wl,--disable-new-dtags 2>&1 | FileCheck -check-prefixes=RELOC2 %s
288+
//
269289
// RELOC2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
270290
// RELOC2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
271291
// RELOC2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
@@ -280,6 +300,7 @@
280300
// RELOC2-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]])
281301
// RELOC2-NOT: linker
282302
// RELOC2-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P10]]}, object
303+
// RELOC2-NOT: host
283304

284305
//
285306
// Test two gpu architectures with complete compilation in device-only
@@ -288,6 +309,14 @@
288309
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
289310
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \
290311
// RUN: 2>&1 | FileCheck -check-prefixes=DBIN2 %s
312+
//
313+
// Test two gpu architectures with complete compilation in device-only
314+
// compilation mode with an unused host linker flag.
315+
//
316+
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
317+
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \
318+
// RUN: -Wl,--disable-new-dtags 2>&1 | FileCheck -check-prefixes=DBIN2 %s
319+
291320
// DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
292321
// DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
293322
// DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
@@ -305,6 +334,7 @@
305334
// DBIN2-DAG: [[P14:[0-9]+]]: linker, {[[P6]], [[P13]]}, hip-fatbin, (device-hip, )
306335
// DBIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P14]]}, hip-fatbin
307336
// DBIN2-NOT: host
337+
308338
//
309339
// Test two gpu architectures up to the assemble phase in device-only
310340
// compilation mode.
@@ -357,11 +387,21 @@
357387
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
358388
// RUN: -fgpu-rdc --cuda-device-only 2>&1 | FileCheck -check-prefixes=L2,RL2,RL2-DEV %s
359389

390+
// RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
391+
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
392+
// RUN: -fgpu-rdc --cuda-device-only -Wl,--disable-new-dtags 2>&1 \
393+
// RUN: | FileCheck -check-prefixes=L2,RL2,RL2-DEV %s
394+
360395
// RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
361396
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
362397
// RUN: -fgpu-rdc --cuda-device-only --no-gpu-bundle-output 2>&1 \
363398
// RUN: | FileCheck -check-prefixes=L2,RL2,RL2-NB %s
364399

400+
// RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
401+
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
402+
// RUN: -fgpu-rdc --cuda-device-only --no-gpu-bundle-output -Wl,--disable-new-dtags 2>&1 \
403+
// RUN: | FileCheck -check-prefixes=L2,RL2,RL2-NB %s
404+
365405
// L2-DAG: [[P0:[0-9]+]]: input, "{{.*}}obj1.o", object
366406
// RL2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, object
367407
// L2-DAG: [[P2:[0-9]+]]: input, "{{.*}}obj2.o", object
@@ -381,6 +421,7 @@
381421
// NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image
382422
// RL2-EM-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]], [[P9]]}, image, (host-[[T]])
383423
// RL2-DEV-NOT: linker
424+
// RL2-NB-NOT: host
384425

385426
// Test one gpu architectures up to the preprocessor expansion output phase in device-only
386427
// compilation mode. no bundle.

0 commit comments

Comments
 (0)