Skip to content

Commit a89ae41

Browse files
[SYCL] Extend -fsycl-device-obj to dump assembly (#17390)
This patch adds `asm` value to `-fsycl-device-obj` to enable dumping assembly (or PTX) of kernels. --------- Co-authored-by: Werner, Stefan <[email protected]>
1 parent fc114b0 commit a89ae41

File tree

5 files changed

+64
-6
lines changed

5 files changed

+64
-6
lines changed

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,10 @@ def err_drv_fsycl_wrong_optimization_options : Error<
427427
def warn_drv_fsycl_add_default_spec_consts_image_flag_in_non_AOT : Warning<
428428
"-fsycl-add-default-spec-consts-image flag has an effect only in Ahead of Time Compilation mode (AOT)">,
429429
InGroup<SyclTarget>;
430+
def warn_drv_fsycl_device_obj_asm_device_only : Warning<
431+
"-fsycl-device-obj=asm flag has an effect only when compiling device code "
432+
"and emitting assembly, make sure both -fsycl-device-only and -S flags are "
433+
"present; will be ignored">, InGroup<UnusedCommandLineArgument>;
430434
def warn_drv_ftarget_register_alloc_mode_pvc : Warning<
431435
"using '%0' to set GRF mode on PVC hardware is deprecated; use '-ftarget-register-alloc-mode=pvc:%1'">,
432436
InGroup<Deprecated>;

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7114,8 +7114,8 @@ defm sycl_id_queries_fit_in_int: BoolFOption<"sycl-id-queries-fit-in-int",
71147114
BothFlags<[], [ClangOption, CLOption, CC1Option], " that SYCL ID queries fit "
71157115
"within MAX_INT.">>;
71167116
def fsycl_device_obj_EQ : Joined<["-"], "fsycl-device-obj=">,
7117-
Values<"spirv,llvmir">, HelpText<"Specify format of device code stored in "
7118-
"the resulting object. Valid values are: spirv, llvmir (default)">;
7117+
Values<"spirv,llvmir,asm">, HelpText<"Specify format of device code stored "
7118+
"in the resulting object. Valid values are: spirv, asm, llvmir (default)">;
71197119
def fsycl_use_bitcode : Flag<["-"], "fsycl-use-bitcode">,
71207120
Alias<fsycl_device_obj_EQ>, AliasArgs<["llvmir"]>, Flags<[Deprecated]>,
71217121
HelpText<"Use LLVM bitcode instead of SPIR-V in fat objects (deprecated)">;

clang/lib/Driver/Driver.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,17 +1330,21 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
13301330
C.getInputArgs().getLastArg(options::OPT_fsycl_range_rounding_EQ);
13311331
checkSingleArgValidity(RangeRoundingPreference, {"disable", "force", "on"});
13321332

1333-
// Evaluation of -fsycl-device-obj is slightly different, we will emit
1334-
// a warning and inform the user of the default behavior used.
1333+
// Evaluation of -fsycl-device-obj is slightly different, we will emit a
1334+
// warning and inform the user of the default behavior used.
13351335
// TODO: General usage of this option is to check for 'spirv' and fallthrough
13361336
// to using llvmir. This can be improved to be more obvious in usage.
13371337
if (Arg *DeviceObj = C.getInputArgs().getLastArgNoClaim(
13381338
options::OPT_fsycl_device_obj_EQ)) {
1339+
const bool SYCLDeviceOnly = C.getDriver().offloadDeviceOnly();
1340+
const bool EmitAsm = C.getInputArgs().getLastArgNoClaim(options::OPT_S);
13391341
StringRef ArgValue(DeviceObj->getValue());
1340-
SmallVector<StringRef, 2> DeviceObjValues = {"spirv", "llvmir"};
1342+
SmallVector<StringRef, 3> DeviceObjValues = {"spirv", "llvmir", "asm"};
13411343
if (llvm::find(DeviceObjValues, ArgValue) == DeviceObjValues.end())
13421344
Diag(clang::diag::warn_ignoring_value_using_default)
13431345
<< DeviceObj->getSpelling().split('=').first << ArgValue << "llvmir";
1346+
else if (ArgValue == "asm" && (!SYCLDeviceOnly || !EmitAsm))
1347+
Diag(clang::diag::warn_drv_fsycl_device_obj_asm_device_only);
13441348
}
13451349

13461350
Arg *SYCLForceTarget =
@@ -1569,6 +1573,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
15691573
addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);
15701574
}
15711575
}
1576+
15721577
// -fno-sycl-libspirv flag is reserved for very unusual cases where the
15731578
// libspirv library is not linked when using CUDA/HIP: so output appropriate
15741579
// warnings.
@@ -5347,6 +5352,15 @@ class OffloadingActionBuilder final {
53475352
types::TY_SPIRV);
53485353
if (SYCLDeviceOnly)
53495354
continue;
5355+
} else if (SYCLDeviceOnly && Args.hasArg(options::OPT_S) &&
5356+
Args.getLastArgValue(options::OPT_fsycl_device_obj_EQ)
5357+
.equals_insensitive("asm")) {
5358+
auto *CompileAction =
5359+
C.MakeAction<CompileJobAction>(A, types::TY_LLVM_BC);
5360+
A = C.MakeAction<BackendJobAction>(CompileAction, types::TY_PP_Asm);
5361+
5362+
if (SYCLDeviceOnly)
5363+
continue;
53505364
} else {
53515365
if (Args.hasArg(options::OPT_fsyntax_only))
53525366
OutputType = types::TY_Nothing;
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
///
2+
/// Check that we call into backend assembler, when using `asm` as device
3+
/// object format, namely:
4+
/// `backend, {2}, assembler, (device-sycl, ...)`
5+
6+
// REQUIRES: nvptx-registered-target,amdgpu-registered-target
7+
8+
/// Check -fsycl-device-obj=asm for AMD.
9+
// RUN: %clang -fsycl-device-only -fsycl -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=gfx90a -fsycl-device-obj=asm -S %s 2>&1 -ccc-print-phases -o - | FileCheck %s --check-prefix=CHECK-AMD
10+
// CHECK-AMD: 0: input, "{{.+\.cpp}}", c++, (device-sycl, gfx90a)
11+
// CHECK-AMD: 1: preprocessor, {0}, c++-cpp-output, (device-sycl, gfx90a)
12+
// CHECK-AMD: 2: compiler, {1}, ir, (device-sycl, gfx90a)
13+
// CHECK-AMD: 3: backend, {2}, assembler, (device-sycl, gfx90a)
14+
// CHECK-AMD: 4: offload, "device-sycl (amdgcn-amd-amdhsa:gfx90a)" {3}, assembler
15+
16+
/// Check -fsycl-device-obj=asm for Nvidia.
17+
// RUN: %clang -fsycl-device-only -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_50 -fsycl-device-obj=asm -S %s 2>&1 -ccc-print-phases -o - | FileCheck %s --check-prefix=CHECK-PTX
18+
// CHECK-PTX: 0: input, "{{.+\.cpp}}", c++, (device-sycl, sm_50)
19+
// CHECK-PTX: 1: preprocessor, {0}, c++-cpp-output, (device-sycl, sm_50)
20+
// CHECK-PTX: 2: compiler, {1}, ir, (device-sycl, sm_50)
21+
// CHECK-PTX: 3: backend, {2}, assembler, (device-sycl, sm_50)
22+
// CHECK-PTX: 4: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {3}, assembler
23+
24+
/// Check -fsycl-device-obj option when emitting llvm IR.
25+
// RUN: %clang -fsycl-device-only -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_50 -fsycl-device-obj=llvmir -S %s 2>&1 -ccc-print-phases -o - | FileCheck %s --check-prefix=CHECK-LLVMIR
26+
// CHECK-LLVMIR: 0: input, "{{.+\.cpp}}", c++, (device-sycl, sm_50)
27+
// CHECK-LLVMIR: 1: preprocessor, {0}, c++-cpp-output, (device-sycl, sm_50)
28+
// CHECK-LLVMIR: 2: compiler, {1}, ir, (device-sycl, sm_50)
29+
// CHECK-LLVMIR: 3: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {2}, ir
30+
31+
/// -fsycl-device-obj=asm should always be accompanied by -fsycl-device-only
32+
/// and -S, check that the compiler issues a correct warning message:
33+
// RUN: %clang -nocudalib -fsycl-device-only -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_50 -fsycl-device-obj=asm %s 2>&1 -o - | FileCheck %s --check-prefix=CHECK-NO-DEV-ONLY-NO-S
34+
// CHECK-NO-DEV-ONLY-NO-S: warning: -fsycl-device-obj=asm flag has an effect only when compiling device code and emitting assembly, make sure both -fsycl-device-only and -S flags are present; will be ignored [-Wunused-command-line-argument]
35+
36+
/// -fsycl-device-obj=asm will finish at generating assembly stage, hence
37+
/// inform users that generating library will not be possible (ignore -c)
38+
// RUN: %clang -nocudalib -fsycl-device-only -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_50 -fsycl-device-obj=asm %s 2>&1 -fsycl-device-only -S -c -o - | FileCheck %s --check-prefix=CHECK-DASH-C-IGNORE
39+
// CHECK-DASH-C-IGNORE: warning: argument unused during compilation: '-c' [-Wunused-command-line-argument]

sycl/doc/UsersManual.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,8 @@ and not recommended to use in production environment.
348348
**`-fsycl-device-obj=<arg>`** [EXPERIMENTAL]
349349

350350
Specify format of device code stored in the resulting object. The <arg> can
351-
be one of the following: "spirv" - SPIR-V is emitted, "llvmir" - LLVM-IR
351+
be one of the following: "spirv" - SPIR-V, "asm" - assembly output when
352+
possible (PTX, when targetting Nvidia devices) , or "llvmir" - LLVM-IR
352353
bitcode format is emitted (default).
353354

354355
**`-fsycl-help[=backend]`**

0 commit comments

Comments
 (0)