Skip to content

Commit fdd40f7

Browse files
committed
[CUDA] Include PTX in non-RDC mode using the new driver
Summary: The old driver embed PTX in rdc-mode and so does the `nvcc` compiler. The new drivers currently does not do this, so we should keep it consistent in this case. This simply requires adding the assembler output as an input to the offloading action that gets fed to fatbin.
1 parent e9901d8 commit fdd40f7

File tree

2 files changed

+21
-12
lines changed

2 files changed

+21
-12
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4625,7 +4625,15 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
46254625
DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
46264626
OffloadAction::DeviceDependences DDep;
46274627
DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
4628+
4629+
// Compiling CUDA in non-RDC mode uses the PTX output if available.
4630+
for (Action *Input : A->getInputs())
4631+
if (Kind == Action::OFK_Cuda && A->getType() == types::TY_Object &&
4632+
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
4633+
false))
4634+
DDep.add(*Input, *TCAndArch->first, TCAndArch->second.data(), Kind);
46284635
OffloadActions.push_back(C.MakeAction<OffloadAction>(DDep, A->getType()));
4636+
46294637
++TCAndArch;
46304638
}
46314639
}

clang/test/Driver/cuda-phases.cu

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -244,31 +244,32 @@
244244
// NEW-DRIVER-RDC-NEXT: 18: assembler, {17}, object, (host-cuda)
245245
// NEW-DRIVER-RDC-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda)
246246

247-
// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver -fgpu-rdc \
247+
// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \
248248
// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW-DRIVER %s
249-
// NEW-DRIVER: 0: input, "[[INPUT:.+]]", cuda
250-
// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output
251-
// NEW-DRIVER-NEXT: 2: compiler, {1}, ir
252-
// NEW-DRIVER-NEXT: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
249+
// NEW-DRIVER: 0: input, "[[CUDA:.+]]", cuda, (host-cuda)
250+
// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
251+
// NEW-DRIVER-NEXT: 2: compiler, {1}, ir, (host-cuda)
252+
// NEW-DRIVER-NEXT: 3: input, "[[CUDA]]", cuda, (device-cuda, sm_52)
253253
// NEW-DRIVER-NEXT: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
254254
// NEW-DRIVER-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52)
255255
// NEW-DRIVER-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52)
256256
// NEW-DRIVER-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52)
257-
// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
258-
// NEW-DRIVER-NEXT: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
257+
// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object
258+
// NEW-DRIVER-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70)
259259
// NEW-DRIVER-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
260260
// NEW-DRIVER-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70)
261261
// NEW-DRIVER-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70)
262262
// NEW-DRIVER-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70)
263-
// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object
264-
// NEW-DRIVER-NEXT: 15: clang-offload-packager, {8, 14}, image
265-
// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (powerpc64le-ibm-linux-gnu)" {15}, ir
263+
// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object
264+
// NEW-DRIVER-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda)
265+
// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir
266266
// NEW-DRIVER-NEXT: 17: backend, {16}, assembler, (host-cuda)
267267
// NEW-DRIVER-NEXT: 18: assembler, {17}, object, (host-cuda)
268268
// NEW-DRIVER-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda)
269269

270270
// RUN: %clang -### --target=powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \
271271
// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s %S/Inputs/empty.cpp 2>&1 | FileCheck --check-prefix=NON-CUDA-INPUT %s
272+
272273
// NON-CUDA-INPUT: 0: input, "[[CUDA:.+]]", cuda, (host-cuda)
273274
// NON-CUDA-INPUT-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
274275
// NON-CUDA-INPUT-NEXT: 2: compiler, {1}, ir, (host-cuda)
@@ -277,13 +278,13 @@
277278
// NON-CUDA-INPUT-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52)
278279
// NON-CUDA-INPUT-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52)
279280
// NON-CUDA-INPUT-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52)
280-
// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
281+
// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object
281282
// NON-CUDA-INPUT-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70)
282283
// NON-CUDA-INPUT-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
283284
// NON-CUDA-INPUT-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70)
284285
// NON-CUDA-INPUT-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70)
285286
// NON-CUDA-INPUT-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70)
286-
// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object
287+
// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object
287288
// NON-CUDA-INPUT-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda)
288289
// NON-CUDA-INPUT-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir
289290
// NON-CUDA-INPUT-NEXT: 17: backend, {16}, assembler, (host-cuda)

0 commit comments

Comments
 (0)