Skip to content

Commit bf8ea96

Browse files
authored
[SYCL] Add extension and implement fp control kernel property (#11591)
1 parent b334e1a commit bf8ea96

File tree

13 files changed

+917
-16
lines changed

13 files changed

+917
-16
lines changed

llvm/lib/SYCLLowerIR/CMakeLists.txt

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@ endif()
1313
if (NOT TARGET LLVMGenXIntrinsics)
1414
if (NOT DEFINED LLVMGenXIntrinsics_SOURCE_DIR)
1515
set(LLVMGenXIntrinsics_GIT_REPO https://github.com/intel/vc-intrinsics.git)
16-
# Author: Jinsong Ji <[email protected]>
17-
# Date: Thu Aug 10 14:41:52 2023 +0000
18-
# Guard removed typed pointer enum within version macro
19-
set(LLVMGenXIntrinsics_GIT_TAG 17a53f4304463b8e7e639d57ef17479040a8a2ad)
16+
# Author: Artur Gainullin <[email protected]>
17+
# Date: Thu Nov 9 00:37:24 2023 +0000
18+
19+
# Replace old kernel with rewritten kernel in metadata only since LLVM 17
20+
set(LLVMGenXIntrinsics_GIT_TAG a8403355ada112b72d1fc7db29fd04325eecee60)
2021

2122
message(STATUS "vc-intrinsics repo is missing. Will try to download it from ${LLVMGenXIntrinsics_GIT_REPO}")
2223
include(FetchContent)

llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,36 @@ const StringMap<Decor> SpirvDecorMap = {
6565
};
6666
#undef SYCL_COMPILE_TIME_PROPERTY
6767

68+
// Masks defined here must be in sync with the SYCL header with fp control
69+
// kernel property.
70+
enum FloatControl {
71+
RTE = 1, // Round to nearest or even
72+
RTP = 1 << 1, // Round towards +ve inf
73+
RTN = 1 << 2, // Round towards -ve inf
74+
RTZ = 1 << 3, // Round towards zero
75+
76+
DENORM_FTZ = 1 << 4, // Denorm mode flush to zero
77+
DENORM_D_ALLOW = 1 << 5, // Denorm mode double allow
78+
DENORM_F_ALLOW = 1 << 6, // Denorm mode float allow
79+
DENORM_HF_ALLOW = 1 << 7 // Denorm mode half allow
80+
};
81+
82+
enum FloatControlMask {
83+
ROUND_MASK = (RTE | RTP | RTN | RTZ),
84+
DENORM_MASK = (DENORM_D_ALLOW | DENORM_F_ALLOW | DENORM_HF_ALLOW)
85+
};
86+
87+
// SPIRV execution modes for FP control.
88+
// These opcodes are specified in SPIRV specification (SPV_KHR_float_controls
89+
// and SPV_INTEL_float_controls2 extensions):
90+
// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.pdf
91+
constexpr uint32_t SPIRV_ROUNDING_MODE_RTE = 4462; // RoundingModeRTE
92+
constexpr uint32_t SPIRV_ROUNDING_MODE_RTZ = 4463; // RoundingModeRTZ
93+
constexpr uint32_t SPIRV_ROUNDING_MODE_RTP_INTEL = 5620; // RoundingModeRTPINTEL
94+
constexpr uint32_t SPIRV_ROUNDING_MODE_RTN_INTEL = 5621; // RoundingModeRTNINTEL
95+
constexpr uint32_t SPIRV_DENORM_FLUSH_TO_ZERO = 4460; // DenormFlushToZero
96+
constexpr uint32_t SPIRV_DENORM_PRESERVE = 4459; // DenormPreserve
97+
6898
/// Builds a metadata node for a SPIR-V decoration (decoration code is
6999
/// \c uint32_t integers) with no value.
70100
///
@@ -282,6 +312,55 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) {
282312
if (!AttrKindStr.startswith("sycl-"))
283313
return std::nullopt;
284314

315+
auto AddFPControlMetadataForWidth = [&](int32_t SPIRVFPControl,
316+
int32_t Width) {
317+
auto NamedMD = M.getOrInsertNamedMetadata("spirv.ExecutionMode");
318+
SmallVector<Metadata *, 4> ValueVec;
319+
ValueVec.push_back(ConstantAsMetadata::get(&F));
320+
ValueVec.push_back(ConstantAsMetadata::get(
321+
ConstantInt::get(Type::getInt32Ty(Ctx), SPIRVFPControl)));
322+
ValueVec.push_back(ConstantAsMetadata::get(
323+
ConstantInt::get(Type::getInt32Ty(Ctx), Width)));
324+
NamedMD->addOperand(MDNode::get(Ctx, ValueVec));
325+
};
326+
327+
auto AddFPControlMetadata = [&](int32_t SPIRVFPControl) {
328+
for (int32_t Width : {64, 32, 16}) {
329+
AddFPControlMetadataForWidth(SPIRVFPControl, Width);
330+
}
331+
};
332+
333+
if (AttrKindStr == "sycl-floating-point-control") {
334+
uint32_t FPControl = getAttributeAsInteger<uint32_t>(Attr);
335+
auto IsFPModeSet = [FPControl](FloatControl Flag) -> bool {
336+
return (FPControl & Flag) == Flag;
337+
};
338+
339+
if (IsFPModeSet(RTE))
340+
AddFPControlMetadata(SPIRV_ROUNDING_MODE_RTE);
341+
342+
if (IsFPModeSet(RTP))
343+
AddFPControlMetadata(SPIRV_ROUNDING_MODE_RTP_INTEL);
344+
345+
if (IsFPModeSet(RTN))
346+
AddFPControlMetadata(SPIRV_ROUNDING_MODE_RTN_INTEL);
347+
348+
if (IsFPModeSet(RTZ))
349+
AddFPControlMetadata(SPIRV_ROUNDING_MODE_RTZ);
350+
351+
if (IsFPModeSet(DENORM_FTZ))
352+
AddFPControlMetadata(SPIRV_DENORM_FLUSH_TO_ZERO);
353+
354+
if (IsFPModeSet(DENORM_HF_ALLOW))
355+
AddFPControlMetadataForWidth(SPIRV_DENORM_PRESERVE, 16);
356+
357+
if (IsFPModeSet(DENORM_F_ALLOW))
358+
AddFPControlMetadataForWidth(SPIRV_DENORM_PRESERVE, 32);
359+
360+
if (IsFPModeSet(DENORM_D_ALLOW))
361+
AddFPControlMetadataForWidth(SPIRV_DENORM_PRESERVE, 64);
362+
}
363+
285364
if (AttrKindStr == "sycl-work-group-size" ||
286365
AttrKindStr == "sycl-work-group-size-hint") {
287366
// Split values in the comma-separated list integers.
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
; RUN: opt -passes=compile-time-properties %s -S | FileCheck %s
2+
3+
4+
define spir_kernel void @"Kernel0"() #0 {
5+
entry:
6+
ret void
7+
}
8+
9+
define spir_kernel void @"Kernel1"() #1 {
10+
entry:
11+
ret void
12+
}
13+
14+
define spir_kernel void @"Kernel2"() #2 {
15+
entry:
16+
ret void
17+
}
18+
19+
define spir_kernel void @"Kernel3"() #3 {
20+
entry:
21+
ret void
22+
}
23+
24+
define spir_kernel void @"Kernel4"() #4 {
25+
entry:
26+
ret void
27+
}
28+
29+
define spir_kernel void @"Kernel5"() #5 {
30+
entry:
31+
ret void
32+
}
33+
34+
define spir_kernel void @"Kernel6"() #6 {
35+
entry:
36+
ret void
37+
}
38+
39+
define spir_kernel void @"Kernel7"() #7 {
40+
entry:
41+
ret void
42+
}
43+
44+
define spir_kernel void @"Kernel8"() #8 {
45+
entry:
46+
ret void
47+
}
48+
49+
define spir_kernel void @"Kernel9"() #9 {
50+
entry:
51+
ret void
52+
}
53+
54+
define spir_kernel void @"Kernel10"() #10 {
55+
entry:
56+
ret void
57+
}
58+
59+
; SPIRV execution modes for FP control. | BitMask
60+
; ROUNDING_MODE_RTE = 4462; | 00000001
61+
; ROUNDING_MODE_RTP_INTEL = 5620; | 00000010
62+
; ROUNDING_MODE_RTN_INTEL = 5621; | 00000100
63+
; ROUNDING_MODE_RTZ = 4463; | 00001000
64+
; DEMORM_FLUSH_TO_ZERO = 4460; | 00010000
65+
; DENORM_PRESERVE (double) = 4459; | 00100000
66+
; DENORM_PRESERVE (float) = 4459; | 01000000
67+
; DENORM_PRESERVE (half) = 4459; | 10000000
68+
69+
; rte + ftz (Default)
70+
; CHECK: !0 = !{ptr @Kernel0, i32 [[RTE:4462]], i32 64}
71+
; CHECK: !1 = !{ptr @Kernel0, i32 [[RTE]], i32 32}
72+
; CHECK: !2 = !{ptr @Kernel0, i32 [[RTE]], i32 16}
73+
; CHECK: !3 = !{ptr @Kernel0, i32 [[FTZ:4460]], i32 64}
74+
; CHECK: !4 = !{ptr @Kernel0, i32 [[FTZ]], i32 32}
75+
; CHECK: !5 = !{ptr @Kernel0, i32 [[FTZ]], i32 16}
76+
attributes #0 = { "sycl-floating-point-control"="17" }
77+
78+
; rtp + ftz
79+
; CHECK: !6 = !{ptr @Kernel1, i32 [[RTP:5620]], i32 64}
80+
; CHECK: !7 = !{ptr @Kernel1, i32 [[RTP]], i32 32}
81+
; CHECK: !8 = !{ptr @Kernel1, i32 [[RTP]], i32 16}
82+
; CHECK: !9 = !{ptr @Kernel1, i32 [[FTZ]], i32 64}
83+
; CHECK: !10 = !{ptr @Kernel1, i32 [[FTZ]], i32 32}
84+
; CHECK: !11 = !{ptr @Kernel1, i32 [[FTZ]], i32 16}
85+
attributes #1 = { "sycl-floating-point-control"="18" }
86+
87+
; rtn + ftz
88+
; CHECK: !12 = !{ptr @Kernel2, i32 [[RTN:5621]], i32 64}
89+
; CHECK: !13 = !{ptr @Kernel2, i32 [[RTN]], i32 32}
90+
; CHECK: !14 = !{ptr @Kernel2, i32 [[RTN]], i32 16}
91+
; CHECK: !15 = !{ptr @Kernel2, i32 [[FTZ]], i32 64}
92+
; CHECK: !16 = !{ptr @Kernel2, i32 [[FTZ]], i32 32}
93+
; CHECK: !17 = !{ptr @Kernel2, i32 [[FTZ]], i32 16}
94+
attributes #2 = { "sycl-floating-point-control"="20" }
95+
96+
; rtz + ftz
97+
; CHECK: !18 = !{ptr @Kernel3, i32 [[RTZ:4463]], i32 64}
98+
; CHECK: !19 = !{ptr @Kernel3, i32 [[RTZ]], i32 32}
99+
; CHECK: !20 = !{ptr @Kernel3, i32 [[RTZ]], i32 16}
100+
; CHECK: !21 = !{ptr @Kernel3, i32 [[FTZ]], i32 64}
101+
; CHECK: !22 = !{ptr @Kernel3, i32 [[FTZ]], i32 32}
102+
; CHECK: !23 = !{ptr @Kernel3, i32 [[FTZ]], i32 16}
103+
attributes #3 = { "sycl-floating-point-control"="24" }
104+
105+
; rte + denorm_preserve(double)
106+
; CHECK: !24 = !{ptr @Kernel4, i32 [[RTE]], i32 64}
107+
; CHECK: !25 = !{ptr @Kernel4, i32 [[RTE]], i32 32}
108+
; CHECK: !26 = !{ptr @Kernel4, i32 [[RTE]], i32 16}
109+
; CHECK: !27 = !{ptr @Kernel4, i32 [[DENORM_PRESERVE:4459]], i32 64}
110+
attributes #4 = { "sycl-floating-point-control"="33" }
111+
112+
; rte + denorm_preserve(float)
113+
; CHECK: !28 = !{ptr @Kernel5, i32 [[RTE]], i32 64}
114+
; CHECK: !29 = !{ptr @Kernel5, i32 [[RTE]], i32 32}
115+
; CHECK: !30 = !{ptr @Kernel5, i32 [[RTE]], i32 16}
116+
; CHECK: !31 = !{ptr @Kernel5, i32 [[DENORM_PRESERVE]], i32 32}
117+
attributes #5 = { "sycl-floating-point-control"="65" }
118+
119+
; rte + denorm_preserve(half)
120+
; CHECK: !32 = !{ptr @Kernel6, i32 [[RTE]], i32 64}
121+
; CHECK: !33 = !{ptr @Kernel6, i32 [[RTE]], i32 32}
122+
; CHECK: !34 = !{ptr @Kernel6, i32 [[RTE]], i32 16}
123+
; CHECK: !35 = !{ptr @Kernel6, i32 [[DENORM_PRESERVE]], i32 16}
124+
attributes #6 = { "sycl-floating-point-control"="129" }
125+
126+
; rte + denorm_allow
127+
; CHECK: !36 = !{ptr @Kernel7, i32 [[RTE]], i32 64}
128+
; CHECK: !37 = !{ptr @Kernel7, i32 [[RTE]], i32 32}
129+
; CHECK: !38 = !{ptr @Kernel7, i32 [[RTE]], i32 16}
130+
; CHECK: !39 = !{ptr @Kernel7, i32 [[DENORM_PRESERVE]], i32 16}
131+
; CHECK: !40 = !{ptr @Kernel7, i32 [[DENORM_PRESERVE]], i32 32}
132+
; CHECK: !41 = !{ptr @Kernel7, i32 [[DENORM_PRESERVE]], i32 64}
133+
attributes #7 = { "sycl-floating-point-control"="225" }
134+
135+
; rtz + denorm_preserve(double)
136+
; CHECK: !42 = !{ptr @Kernel8, i32 [[RTZ]], i32 64}
137+
; CHECK: !43 = !{ptr @Kernel8, i32 [[RTZ]], i32 32}
138+
; CHECK: !44 = !{ptr @Kernel8, i32 [[RTZ]], i32 16}
139+
; CHECK: !45 = !{ptr @Kernel8, i32 [[DENORM_PRESERVE]], i32 64}
140+
attributes #8 = { "sycl-floating-point-control"="40" }
141+
142+
; rtp + denorm_preserve(float)
143+
; CHECK: !46 = !{ptr @Kernel9, i32 [[RTP]], i32 64}
144+
; CHECK: !47 = !{ptr @Kernel9, i32 [[RTP]], i32 32}
145+
; CHECK: !48 = !{ptr @Kernel9, i32 [[RTP]], i32 16}
146+
; CHECK: !49 = !{ptr @Kernel9, i32 [[DENORM_PRESERVE]], i32 32}
147+
attributes #9 = { "sycl-floating-point-control"="66" }
148+
149+
; rtz + denorm_allow
150+
; CHECK: !50 = !{ptr @Kernel10, i32 [[RTZ]], i32 64}
151+
; CHECK: !51 = !{ptr @Kernel10, i32 [[RTZ]], i32 32}
152+
; CHECK: !52 = !{ptr @Kernel10, i32 [[RTZ]], i32 16}
153+
; CHECK: !53 = !{ptr @Kernel10, i32 [[DENORM_PRESERVE]], i32 16}
154+
; CHECK: !54 = !{ptr @Kernel10, i32 [[DENORM_PRESERVE]], i32 32}
155+
; CHECK: !55 = !{ptr @Kernel10, i32 [[DENORM_PRESERVE]], i32 64}
156+
attributes #10 = { "sycl-floating-point-control"="232" }

llvm/tools/sycl-post-link/ModuleSplitter.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,52 @@ void collectFunctionsAndGlobalVariablesToExtract(
281281
}
282282
}
283283

284+
// Check "spirv.ExecutionMode" named metadata in the module and remove nodes
285+
// that reference kernels that have dead prototypes or don't reference any
286+
// kernel at all (nullptr). Dead prototypes are removed as well.
287+
void processSubModuleNamedMetadata(Module *M) {
288+
auto ExecutionModeMD = M->getNamedMetadata("spirv.ExecutionMode");
289+
if (!ExecutionModeMD)
290+
return;
291+
292+
bool ContainsNodesToRemove = false;
293+
std::vector<MDNode *> ValueVec;
294+
for (auto Op : ExecutionModeMD->operands()) {
295+
assert(Op->getNumOperands() > 0);
296+
if (!Op->getOperand(0)) {
297+
ContainsNodesToRemove = true;
298+
continue;
299+
}
300+
301+
// If the first operand is not nullptr then it has to be a kernel
302+
// function.
303+
Value *Val = cast<ValueAsMetadata>(Op->getOperand(0))->getValue();
304+
Function *F = cast<Function>(Val);
305+
// If kernel function is just a prototype and unused then we can remove it
306+
// and later remove corresponding spirv.ExecutionMode metadata node.
307+
if (F->isDeclaration() && F->use_empty()) {
308+
F->eraseFromParent();
309+
ContainsNodesToRemove = true;
310+
continue;
311+
}
312+
313+
// Rememver nodes which we need to keep in the module.
314+
ValueVec.push_back(Op);
315+
}
316+
if (!ContainsNodesToRemove)
317+
return;
318+
319+
if (ValueVec.empty()) {
320+
// If all nodes need to be removed then just remove named metadata
321+
// completely.
322+
ExecutionModeMD->eraseFromParent();
323+
} else {
324+
ExecutionModeMD->clearOperands();
325+
for (auto MD : ValueVec)
326+
ExecutionModeMD->addOperand(MD);
327+
}
328+
}
329+
284330
ModuleDesc extractSubModule(const ModuleDesc &MD,
285331
const SetVector<const GlobalValue *> GVs,
286332
EntryPointGroup &&ModuleEntryPoints) {
@@ -577,6 +623,15 @@ void ModuleDesc::cleanup() {
577623
MPM.addPass(StripDeadDebugInfoPass()); // Remove dead debug info.
578624
MPM.addPass(StripDeadPrototypesPass()); // Remove dead func decls.
579625
MPM.run(*M, MAM);
626+
627+
// Original module may have named metadata (spirv.ExecutionMode) referencing
628+
// kernels in the module. Some of the Metadata nodes may reference kernels
629+
// which are not included into the extracted submodule, in such case
630+
// CloneModule either leaves that metadata nodes as is but they will reference
631+
// dead prototype of the kernel or operand will be replace with nullptr. So
632+
// process all nodes in the named metadata and remove nodes which are
633+
// referencing kernels which are not included into submodule.
634+
processSubModuleNamedMetadata(M.get());
580635
}
581636

582637
bool ModuleDesc::isSpecConstantDefault() const {

0 commit comments

Comments
 (0)