Skip to content

Commit 4d3c010

Browse files
committed
[CUDA] Do not embed a fatbinary when using the new driver
Previously, when using the new driver we created a fatbinary with the PTX and Cubin output. This was mainly done in an attempt to create some backwards compatibility with the existing CUDA support that embeds the fatbinary in each TU. This will most likely be more work than necessary to actually implement. The linker wrapper cannot do anything with these embedded PTX files because we do not know how to link them, and if we did want to include multiple files it should go through the `clang-offload-packager` instead. Also this didn't repsect the setting that disables embedding PTX (although it wasn't used anyway). Reviewed By: tra Differential Revision: https://reviews.llvm.org/D128441
1 parent 22f1273 commit 4d3c010

File tree

4 files changed

+18
-38
lines changed

4 files changed

+18
-38
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4459,17 +4459,6 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
44594459
OffloadAction::DeviceDependences DDep;
44604460
DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
44614461
A = C.MakeAction<OffloadAction>(HDep, DDep);
4462-
} else if (isa<AssembleJobAction>(A) && Kind == Action::OFK_Cuda) {
4463-
// The Cuda toolchain uses fatbinary as the linker phase to bundle the
4464-
// PTX and Cubin output.
4465-
ActionList FatbinActions;
4466-
for (Action *A : {A, A->getInputs()[0]}) {
4467-
OffloadAction::DeviceDependences DDep;
4468-
DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
4469-
FatbinActions.emplace_back(
4470-
C.MakeAction<OffloadAction>(DDep, A->getType()));
4471-
}
4472-
A = C.MakeAction<LinkJobAction>(FatbinActions, types::TY_CUDA_FATBIN);
44734462
}
44744463
++TCAndArch;
44754464
}

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -536,8 +536,9 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
536536
const char *Arch = (II.getType() == types::TY_PP_Asm)
537537
? CudaArchToVirtualArchString(gpu_arch)
538538
: gpu_arch_str;
539-
CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
540-
Arch + ",file=" + II.getFilename()));
539+
CmdArgs.push_back(
540+
Args.MakeArgString(llvm::Twine("--image=profile=") + Arch +
541+
",file=" + getToolChain().getInputFilename(II)));
541542
}
542543

543544
for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
@@ -695,9 +696,8 @@ CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
695696

696697
std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
697698
// Only object files are changed, for example assembly files keep their .s
698-
// extensions. CUDA also continues to use .o as they don't use nvlink but
699-
// fatbinary.
700-
if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
699+
// extensions.
700+
if (Input.getType() != types::TY_Object)
701701
return ToolChain::getInputFilename(Input);
702702

703703
// Replace extension for object files with cubin because nvlink relies on

clang/test/Driver/cuda-openmp-driver.cu

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,11 @@
55
// RUN: --offload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
66
// RUN: | FileCheck -check-prefix BINDINGS %s
77

8-
// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
8+
// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
99
// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
10-
// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
1110
// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
1211
// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
13-
// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
14-
// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[BINARY:.+]]"
12+
// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[CUBIN_SM_35]]", "[[CUBIN_SM_70]]"], output: "[[BINARY:.+]]"
1513
// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]"
1614
// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
1715

@@ -31,7 +29,6 @@
3129

3230
// BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX:.+]]"
3331
// BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX]]"], output: "[[CUBIN:.+]]"
34-
// BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN]]", "[[PTX]]"], output: "{{.*}}.fatbin"
3532

3633
// RUN: %clang -### -target x86_64-linux-gnu -nocudalib --cuda-feature=+ptx61 --offload-arch=sm_70 %s 2>&1 | FileCheck -check-prefix MANUAL-FEATURE %s
3734
// MANUAL-FEATURE: -cc1{{.*}}-target-feature{{.*}}+ptx61

clang/test/Driver/cuda-phases.cu

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -232,20 +232,14 @@
232232
// NEW_DRIVER: 6: backend, {5}, assembler, (device-cuda, sm_52)
233233
// NEW_DRIVER: 7: assembler, {6}, object, (device-cuda, sm_52)
234234
// NEW_DRIVER: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
235-
// NEW_DRIVER: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, assembler
236-
// NEW_DRIVER: 10: linker, {8, 9}, cuda-fatbin, (device-cuda, sm_52)
237-
// NEW_DRIVER: 11: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {10}, cuda-fatbin
238-
// NEW_DRIVER: 12: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
239-
// NEW_DRIVER: 13: preprocessor, {12}, cuda-cpp-output, (device-cuda, sm_70)
240-
// NEW_DRIVER: 14: compiler, {13}, ir, (device-cuda, sm_70)
241-
// NEW_DRIVER: 15: backend, {14}, assembler, (device-cuda, sm_70)
242-
// NEW_DRIVER: 16: assembler, {15}, object, (device-cuda, sm_70)
243-
// NEW_DRIVER: 17: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {16}, object
244-
// NEW_DRIVER: 18: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {15}, assembler
245-
// NEW_DRIVER: 19: linker, {17, 18}, cuda-fatbin, (device-cuda, sm_70)
246-
// NEW_DRIVER: 20: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {19}, cuda-fatbin
247-
// NEW_DRIVER: 21: clang-offload-packager, {11, 20}, image
248-
// NEW_DRIVER: 22: offload, " (powerpc64le-ibm-linux-gnu)" {2}, " (powerpc64le-ibm-linux-gnu)" {21}, ir
249-
// NEW_DRIVER: 23: backend, {22}, assembler, (host-cuda)
250-
// NEW_DRIVER: 24: assembler, {23}, object, (host-cuda)
251-
// NEW_DRIVER: 25: clang-linker-wrapper, {24}, image, (host-cuda)
235+
// NEW_DRIVER: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
236+
// NEW_DRIVER: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
237+
// NEW_DRIVER: 11: compiler, {10}, ir, (device-cuda, sm_70)
238+
// NEW_DRIVER: 12: backend, {11}, assembler, (device-cuda, sm_70)
239+
// NEW_DRIVER: 13: assembler, {12}, object, (device-cuda, sm_70)
240+
// NEW_DRIVER: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object
241+
// NEW_DRIVER: 15: clang-offload-packager, {8, 14}, image
242+
// NEW_DRIVER: 16: offload, " (powerpc64le-ibm-linux-gnu)" {2}, " (powerpc64le-ibm-linux-gnu)" {15}, ir
243+
// NEW_DRIVER: 17: backend, {16}, assembler, (host-cuda)
244+
// NEW_DRIVER: 18: assembler, {17}, object, (host-cuda)
245+
// NEW_DRIVER: 19: clang-linker-wrapper, {18}, image, (host-cuda)

0 commit comments

Comments
 (0)