Skip to content

Commit e5eb365

Browse files
committed
[CUDA][HIP] Fix offloading kind for linking C++ programs
When both CUDA or HIP programs and C++ programs are passed to clang driver without -c, C++ programs are treated as CUDA or HIP program, which is incorrect. This is because action builder sets the offloading kind of input job actions to the linking action to be the union of offloading kind of the input job actions, i.e. if there is one HIP or CUDA input to the linker, then all the input to the linker is marked as HIP or CUDA. To fix this issue, the offload action builder tracks the originating input argument of each host action, which allows it to determine the active offload kind of each host action. Then the offload kind of each input action to the linker can be determined individually. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D120911
1 parent 9bd72b5 commit e5eb365

File tree

3 files changed

+97
-5
lines changed

3 files changed

+97
-5
lines changed

clang/include/clang/Driver/Action.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,11 @@ class Action {
189189
/// dependences.
190190
void propagateHostOffloadInfo(unsigned OKinds, const char *OArch);
191191

192+
void setHostOffloadInfo(unsigned OKinds, const char *OArch) {
193+
ActiveOffloadKindMask |= OKinds;
194+
OffloadingArch = OArch;
195+
}
196+
192197
/// Set the offload info of this action to be the same as the provided action,
193198
/// and propagate it to its dependences.
194199
void propagateOffloadInfo(const Action *A);

clang/lib/Driver/Driver.cpp

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2465,6 +2465,9 @@ class OffloadingActionBuilder final {
24652465
/// Map between an input argument and the offload kinds used to process it.
24662466
std::map<const Arg *, unsigned> InputArgToOffloadKindMap;
24672467

2468+
/// Map between a host action and its originating input argument.
2469+
std::map<Action *, const Arg *> HostActionToInputArgMap;
2470+
24682471
/// Builder interface. It doesn't build anything or keep any state.
24692472
class DeviceActionBuilder {
24702473
public:
@@ -3449,6 +3452,17 @@ class OffloadingActionBuilder final {
34493452
delete SB;
34503453
}
34513454

3455+
/// Record a host action and its originating input argument.
3456+
void recordHostAction(Action *HostAction, const Arg *InputArg) {
3457+
assert(HostAction && "Invalid host action");
3458+
assert(InputArg && "Invalid input argument");
3459+
auto Loc = HostActionToInputArgMap.find(HostAction);
3460+
if (Loc == HostActionToInputArgMap.end())
3461+
HostActionToInputArgMap[HostAction] = InputArg;
3462+
assert(HostActionToInputArgMap[HostAction] == InputArg &&
3463+
"host action mapped to multiple input arguments");
3464+
}
3465+
34523466
/// Generate an action that adds device dependences (if any) to a host action.
34533467
/// If no device dependence actions exist, just return the host action \a
34543468
/// HostAction. If an error is found or if no builder requires the host action
@@ -3464,6 +3478,7 @@ class OffloadingActionBuilder final {
34643478
return HostAction;
34653479

34663480
assert(HostAction && "Invalid host action!");
3481+
recordHostAction(HostAction, InputArg);
34673482

34683483
OffloadAction::DeviceDependences DDeps;
34693484
// Check if all the programming models agree we should not emit the host
@@ -3517,6 +3532,8 @@ class OffloadingActionBuilder final {
35173532
if (!IsValid)
35183533
return true;
35193534

3535+
recordHostAction(HostAction, InputArg);
3536+
35203537
// If we are supporting bundling/unbundling and the current action is an
35213538
// input action of non-source file, we replace the host action by the
35223539
// unbundling action. The bundler tool has the logic to detect if an input
@@ -3533,6 +3550,7 @@ class OffloadingActionBuilder final {
35333550
C.getSingleOffloadToolChain<Action::OFK_Host>(),
35343551
/*BoundArch=*/StringRef(), Action::OFK_Host);
35353552
HostAction = UnbundlingHostAction;
3553+
recordHostAction(HostAction, InputArg);
35363554
}
35373555

35383556
assert(HostAction && "Invalid host action!");
@@ -3569,6 +3587,9 @@ class OffloadingActionBuilder final {
35693587
/// programming models allow it.
35703588
bool appendTopLevelActions(ActionList &AL, Action *HostAction,
35713589
const Arg *InputArg) {
3590+
if (HostAction)
3591+
recordHostAction(HostAction, InputArg);
3592+
35723593
// Get the device actions to be appended.
35733594
ActionList OffloadAL;
35743595
for (auto *SB : SpecializedBuilders) {
@@ -3590,6 +3611,7 @@ class OffloadingActionBuilder final {
35903611
// before this method was called.
35913612
assert(HostAction == AL.back() && "Host action not in the list??");
35923613
HostAction = C.MakeAction<OffloadBundlingJobAction>(OffloadAL);
3614+
recordHostAction(HostAction, InputArg);
35933615
AL.back() = HostAction;
35943616
} else
35953617
AL.append(OffloadAL.begin(), OffloadAL.end());
@@ -3623,6 +3645,11 @@ class OffloadingActionBuilder final {
36233645
if (!SB->isValid())
36243646
continue;
36253647
HA = SB->appendLinkHostActions(DeviceAL);
3648+
// This created host action has no originating input argument, therefore
3649+
// needs to set its offloading kind directly.
3650+
if (HA)
3651+
HA->propagateHostOffloadInfo(SB->getAssociatedOffloadKind(),
3652+
/*BoundArch=*/nullptr);
36263653
}
36273654
return HA;
36283655
}
@@ -3649,10 +3676,22 @@ class OffloadingActionBuilder final {
36493676
// If we don't have device dependencies, we don't have to create an offload
36503677
// action.
36513678
if (DDeps.getActions().empty()) {
3652-
// Propagate all the active kinds to host action. Given that it is a link
3653-
// action it is assumed to depend on all actions generated so far.
3654-
HostAction->propagateHostOffloadInfo(ActiveOffloadKinds,
3655-
/*BoundArch=*/nullptr);
3679+
// Set all the active offloading kinds to the link action. Given that it
3680+
// is a link action it is assumed to depend on all actions generated so
3681+
// far.
3682+
HostAction->setHostOffloadInfo(ActiveOffloadKinds,
3683+
/*BoundArch=*/nullptr);
3684+
// Propagate active offloading kinds for each input to the link action.
3685+
// Each input may have different active offloading kind.
3686+
for (auto A : HostAction->inputs()) {
3687+
auto ArgLoc = HostActionToInputArgMap.find(A);
3688+
if (ArgLoc == HostActionToInputArgMap.end())
3689+
continue;
3690+
auto OFKLoc = InputArgToOffloadKindMap.find(ArgLoc->second);
3691+
if (OFKLoc == InputArgToOffloadKindMap.end())
3692+
continue;
3693+
A->propagateHostOffloadInfo(OFKLoc->second, /*BoundArch=*/nullptr);
3694+
}
36563695
return HostAction;
36573696
}
36583697

clang/test/Driver/hip-phases.hip

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,17 +459,65 @@
459459
// Test mixed HIP and C++ compilation. HIP program should have HIP offload kind.
460460
// C++ program should have no offload kind.
461461

462+
// Test compile empty.hip and empty.cpp.
462463
// RUN: %clang -target x86_64-unknown-linux-gnu \
463464
// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
464465
// RUN: -c %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
465-
466466
// RUN: %clang -target x86_64-unknown-linux-gnu \
467467
// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
468468
// RUN: -c %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
469469

470+
// Test compile and link empty.hip and empty.cpp.
471+
// RUN: %clang -target x86_64-unknown-linux-gnu \
472+
// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
473+
// RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
474+
// RUN: %clang -target x86_64-unknown-linux-gnu \
475+
// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
476+
// RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
477+
478+
// Test compile and link empty.hip and empty.cpp with --hip-link -fgpu-rdc.
479+
// RUN: %clang -target x86_64-unknown-linux-gnu --hip-link -fgpu-rdc \
480+
// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
481+
// RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
482+
// RUN: %clang -target x86_64-unknown-linux-gnu --hip-link -fgpu-rdc \
483+
// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
484+
// RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
485+
486+
// Test compile and link -x hip empty.hip and -x c++ empty.cpp.
487+
// RUN: %clang -target x86_64-unknown-linux-gnu \
488+
// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
489+
// RUN: -x hip %S/Inputs/empty.hip -x c++ %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
490+
// RUN: %clang -target x86_64-unknown-linux-gnu \
491+
// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
492+
// RUN: -x hip %S/Inputs/empty.hip -x c++ %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
493+
494+
// Test compile and link -x hip empty.hip and empty.cpp.
495+
// RUN: %clang -target x86_64-unknown-linux-gnu \
496+
// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
497+
// RUN: -x hip %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2 %s
498+
// RUN: %clang -target x86_64-unknown-linux-gnu \
499+
// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
500+
// RUN: -x hip %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2-NEG %s
501+
502+
// Test compile and link empty.hip and -x hip empty.cpp.
503+
// RUN: %clang -target x86_64-unknown-linux-gnu \
504+
// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
505+
// RUN: %S/Inputs/empty.hip -x hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2 %s
506+
// RUN: %clang -target x86_64-unknown-linux-gnu \
507+
// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
508+
// RUN: -x hip %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2-NEG %s
509+
470510
// MIXED-DAG: input, "{{.*}}empty.hip", hip, (host-hip)
471511
// MIXED-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx803)
472512
// MIXED-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx900)
473513
// MIXED-DAG: input, "{{.*}}empty.cpp", c++
474514
// MIXED-NEG-NOT: input, "{{.*}}empty.cpp", c++, (host-hip)
475515
// MIXED-NEG-NOT: input, "{{.*}}empty.cpp", c++, (device-hip
516+
517+
// MIXED2-DAG: input, "{{.*}}empty.hip", hip, (host-hip)
518+
// MIXED2-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx803)
519+
// MIXED2-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx900)
520+
// MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (host-hip)
521+
// MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (device-hip, gfx803)
522+
// MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (device-hip, gfx900)
523+
// MIXED2-NEG-NOT: input, "{{.*}}empty.cpp", c++

0 commit comments

Comments
 (0)