Skip to content

Revert "[HIP] use offload wrapper for non-device-only non-rdc (#132869)" #143432

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions clang/lib/CodeGen/CGCUDANV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1280,8 +1280,7 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() {
return nullptr;
}
if (CGM.getLangOpts().OffloadViaLLVM ||
(CGM.getLangOpts().OffloadingNewDriver &&
(CGM.getLangOpts().HIP || RelocatableDeviceCode)))
(CGM.getLangOpts().OffloadingNewDriver && RelocatableDeviceCode))
createOffloadingEntries();
else
return makeModuleCtorFunction();
Expand Down
59 changes: 18 additions & 41 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4424,10 +4424,6 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
options::OPT_no_offload_new_driver,
C.isOffloadingHostKind(Action::OFK_Cuda));

bool HIPNoRDC =
C.isOffloadingHostKind(Action::OFK_HIP) &&
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);

// Builder to be used to build offloading actions.
std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =
!UseNewOffloadingDriver
Expand Down Expand Up @@ -4561,7 +4557,7 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
// Check if this Linker Job should emit a static library.
if (ShouldEmitStaticLibrary(Args)) {
LA = C.MakeAction<StaticLibJobAction>(LinkerInputs, types::TY_Image);
} else if ((UseNewOffloadingDriver && !HIPNoRDC) ||
} else if (UseNewOffloadingDriver ||
Args.hasArg(options::OPT_offload_link)) {
LA = C.MakeAction<LinkerWrapperJobAction>(LinkerInputs, types::TY_Image);
LA->propagateHostOffloadInfo(C.getActiveOffloadKinds(),
Expand Down Expand Up @@ -4872,28 +4868,10 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
const InputTy &Input, StringRef CUID,
Action *HostAction) const {
// Don't build offloading actions if explicitly disabled or we do not have a
// valid source input.
if (offloadHostOnly() || !types::isSrcFile(Input.first))
return HostAction;

bool HIPNoRDC =
C.isOffloadingHostKind(Action::OFK_HIP) &&
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);

// For HIP non-rdc non-device-only compilation, create a linker wrapper
// action for each host object to link, bundle and wrap device files in
// it.
if (isa<AssembleJobAction>(HostAction) && HIPNoRDC && !offloadDeviceOnly()) {
ActionList AL{HostAction};
HostAction = C.MakeAction<LinkerWrapperJobAction>(AL, types::TY_Object);
HostAction->propagateHostOffloadInfo(C.getActiveOffloadKinds(),
/*BoundArch=*/nullptr);
return HostAction;
}

// Don't build offloading actions if we do not have a compile action. If
// preprocessing only ignore embedding.
if (!(isa<CompileJobAction>(HostAction) ||
// valid source input and compile action to embed it in. If preprocessing only
// ignore embedding.
if (offloadHostOnly() || !types::isSrcFile(Input.first) ||
!(isa<CompileJobAction>(HostAction) ||
getFinalPhase(Args) == phases::Preprocess))
return HostAction;

Expand Down Expand Up @@ -4989,12 +4967,12 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
}
}

// Compiling HIP in device-only non-RDC mode requires linking each action
// individually.
// Compiling HIP in non-RDC mode requires linking each action individually.
for (Action *&A : DeviceActions) {
if ((A->getType() != types::TY_Object &&
A->getType() != types::TY_LTO_BC) ||
!HIPNoRDC || !offloadDeviceOnly())
Kind != Action::OFK_HIP ||
Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
continue;
ActionList LinkerInput = {A};
A = C.MakeAction<LinkJobAction>(LinkerInput, types::TY_Image);
Expand All @@ -5018,12 +4996,12 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
}
}

// HIP code in device-only non-RDC mode will bundle the output if it invoked
// the linker.
// HIP code in non-RDC mode will bundle the output if it invoked the linker.
bool ShouldBundleHIP =
HIPNoRDC && offloadDeviceOnly() &&
C.isOffloadingHostKind(Action::OFK_HIP) &&
Args.hasFlag(options::OPT_gpu_bundle_output,
options::OPT_no_gpu_bundle_output, true) &&
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) &&
!llvm::any_of(OffloadActions,
[](Action *A) { return A->getType() != types::TY_Image; });

Expand All @@ -5043,9 +5021,11 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
C.MakeAction<LinkJobAction>(OffloadActions, types::TY_CUDA_FATBIN);
DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_Cuda>(),
nullptr, Action::OFK_Cuda);
} else if (HIPNoRDC && offloadDeviceOnly()) {
// If we are in device-only non-RDC-mode we just emit the final HIP
// fatbinary for each translation unit, linking each input individually.
} else if (C.isOffloadingHostKind(Action::OFK_HIP) &&
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false)) {
// If we are not in RDC-mode we just emit the final HIP fatbinary for each
// translation unit, linking each input individually.
Action *FatbinAction =
C.MakeAction<LinkJobAction>(OffloadActions, types::TY_HIP_FATBIN);
DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_HIP>(),
Expand Down Expand Up @@ -5198,11 +5178,8 @@ Action *Driver::ConstructPhaseAction(
(((Input->getOffloadingToolChain() &&
Input->getOffloadingToolChain()->getTriple().isAMDGPU()) ||
TargetDeviceOffloadKind == Action::OFK_HIP) &&
((Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false) ||
(Args.hasFlag(options::OPT_offload_new_driver,
options::OPT_no_offload_new_driver, false) &&
!offloadDeviceOnly())) ||
(Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false) ||
TargetDeviceOffloadKind == Action::OFK_OpenMP))) {
types::ID Output =
Args.hasArg(options::OPT_S) &&
Expand Down
18 changes: 3 additions & 15 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7821,7 +7821,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-fcuda-include-gpubinary");
CmdArgs.push_back(CudaDeviceInput->getFilename());
} else if (!HostOffloadingInputs.empty()) {
if (IsCuda && !IsRDCMode) {
if ((IsCuda || IsHIP) && !IsRDCMode) {
assert(HostOffloadingInputs.size() == 1 && "Only one input expected");
CmdArgs.push_back("-fcuda-include-gpubinary");
CmdArgs.push_back(HostOffloadingInputs.front().getFilename());
Expand Down Expand Up @@ -9368,20 +9368,8 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
// Add the linker arguments to be forwarded by the wrapper.
CmdArgs.push_back(Args.MakeArgString(Twine("--linker-path=") +
LinkCommand->getExecutable()));

// We use action type to differentiate two use cases of the linker wrapper.
// TY_Image for normal linker wrapper work.
// TY_Object for HIP fno-gpu-rdc embedding device binary in a relocatable
// object.
assert(JA.getType() == types::TY_Object || JA.getType() == types::TY_Image);
if (JA.getType() == types::TY_Object) {
CmdArgs.append({"-o", Output.getFilename()});
for (auto Input : Inputs)
CmdArgs.push_back(Input.getFilename());
CmdArgs.push_back("-r");
} else
for (const char *LinkArg : LinkCommand->getArguments())
CmdArgs.push_back(LinkArg);
for (const char *LinkArg : LinkCommand->getArguments())
CmdArgs.push_back(LinkArg);

addOffloadCompressArgs(Args, CmdArgs);

Expand Down
6 changes: 3 additions & 3 deletions clang/test/Driver/hip-binding.hip
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@
// RUN: -nogpulib -nogpuinc -foffload-lto --offload-arch=gfx90a --offload-arch=gfx908 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix=LTO-NO-RDC %s
// LTO-NO-RDC: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[LTO_908:.+]]"
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[LTO_908]]"], output: "[[OBJ_908:.+]]"
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], output: "[[LTO_90A:.+]]"
// LTO-NO-RDC-NEXT: # "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[LTO_908]]", "[[LTO_90A]]"], output: "[[PKG:.+]]"
// LTO-NO-RDC-NEXT: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[PKG]]"], output: "[[OBJ:.+]]"
// LTO-NO-RDC-NEXT: # "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OBJ]]"], output: "hip-binding.o"
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[LTO_90A]]"], output: "[[OBJ_90A:.+]]"
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ_908]]", "[[OBJ_90A]]"], output: "[[HIPFB:.+]]"
51 changes: 20 additions & 31 deletions clang/test/Driver/hip-phases.hip
Original file line number Diff line number Diff line change
Expand Up @@ -8,50 +8,39 @@
//
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
// RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=BIN,OLD,OLDN %s
// RUN: | FileCheck -check-prefixes=BIN,NRD,OLD %s
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
// RUN: --offload-new-driver --cuda-gpu-arch=gfx803 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=BIN,NEW,NEWN %s
// RUN: | FileCheck -check-prefixes=BIN,NRD,NEW %s
//
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
// RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \
// RUN: | FileCheck -check-prefixes=BIN,OLD,OLDR %s
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
// RUN: --offload-new-driver --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \
// RUN: | FileCheck -check-prefixes=BIN,NEW,NEWR %s
// RUN: | FileCheck -check-prefixes=BIN,RDC %s
//
// BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
// BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
// BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
// OLDR-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
// OLDR-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
// RDC-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
// RDC-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])

// BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:gfx803]])
// BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
// BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]])
// OLDN-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
// NEW-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
// OLDN-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
// OLDR-DAG: [[P7:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
// OLD-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]])
// OLD-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image
// NEW-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P6]]}, ir
// OLDN-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-[[T]])
// NEW-DAG: [[P10:[0-9]+]]: clang-offload-packager, {[[P9]]}, image, (device-[[T]])
// OLDR-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, object, (device-[[T]])

// OLDN-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir
// NEW-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (x86_64-unknown-linux-gnu)" {[[P10]]}, ir
// OLDR-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, object
// OLDN-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
// OLDN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
// NEW-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
// NEW-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
// OLDN-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])
// NEWN-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, object, (host-[[T]])
// OLDR-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P11]]}, image, (host-[[T]])
// NEWR-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, image, (host-[[T]])
// NEWN-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image
// NRD-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
// NRD-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
// RDC-DAG: [[P7:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
// BIN-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]])
// BIN-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image
// NRD-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-[[T]])
// RDC-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, object, (device-[[T]])

// NRD-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir
// RDC-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, object
// NRD-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
// NRD-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
// OLD-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])
// NEW-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, image, (host-[[T]])
// RDC-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P11]]}, image, (host-[[T]])

//
// Test single gpu architecture up to the assemble phase.
Expand Down
Loading
Loading