Skip to content

Commit 2e1f97b

Browse files
committed
[ESIMD] Add set_kernel_properties API and use_double_grf property.
This patch: 1) Adds esimd::set_kernel_properties API with the single supported property esimd::kernel_properties::use_double_grf, which lets compiler know that the calling kernel needs run in "double GRF" mode - more registers per thread at the expense of fewer H/W threads. This is temporary API until generic SYCL support for kernel properties is implemented: https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_kernel_properties.asciidoc 2) Provides "lowering" of this API by the LowerESIMD.cpp, which marks such kernels with "esimd-double-grf" function attribute. 3) Implements new "dimension" of device code splitting in sycl-post-link: functions with and without "esimd-double-grf" attribute go to different modules. Device binary images resulting from "double-grf" modules are assigned the "isDoubleGRFEsimdImage" property 4) Updates runtime to add "-doubleGRF" option when JITting SPIRV binaries with the "isDoubleGRFEsimdImage" property. 5) Fixes sycl-post-link bug in ModuleSplitter.cpp:extractSubModule, where Function objects in the entry point list were not replaced with new Function objects in the cloned Module. This lead to corrupted symbol file in some cases. Signed-off-by: Konstantin S Bobrovsky <[email protected]>
1 parent 3e1c1bf commit 2e1f97b

File tree

9 files changed

+435
-103
lines changed

9 files changed

+435
-103
lines changed

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

Lines changed: 115 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ class SYCLLowerESIMDLegacyPass : public ModulePass {
6969
};
7070
} // namespace
7171

72+
constexpr char ATTR_DOUBLE_GRF[] = "esimd-double-grf";
73+
7274
char SYCLLowerESIMDLegacyPass::ID = 0;
7375
INITIALIZE_PASS(SYCLLowerESIMDLegacyPass, "LowerESIMD",
7476
"Lower constructs specific to Close To Metal", false, false)
@@ -899,52 +901,30 @@ static inline llvm::Metadata *getMD(llvm::Value *V) {
899901
return llvm::ValueAsMetadata::get(V);
900902
}
901903

902-
/// Updates genx.kernels metadata attribute \p MD for the given function \p F.
903-
/// The value of the attribute is updated only if the new value \p NewVal is
904-
/// bigger than what is already stored in the attribute.
905-
// TODO: 1) In general this function is supposed to handle intrinsics
906-
// translated into kernel's metadata. So, the primary/intended usage model is
907-
// when such intrinsics are called from kernels.
908-
// 2) For now such intrinsics are also handled in functions directly called
909-
// from kernels and being translate into those caller-kernel meeven though such
910-
// behaviour is not fully specified/documented.
911-
// 3) This code (or the code in FE) must verify that slm_init or other such
912-
// intrinsic is not called from another module because kernels in that other
913-
// module would not get updated meta data attributes.
914-
static void updateGenXMDNodes(llvm::Function *F, genx::KernelMDOp MD,
915-
uint64_t NewVal) {
916-
llvm::NamedMDNode *GenXKernelMD =
917-
F->getParent()->getNamedMetadata(GENX_KERNEL_METADATA);
918-
assert(GenXKernelMD && "invalid genx.kernels metadata");
904+
static bool isESIMDKernel(const Function &F) {
905+
return (F.getCallingConv() == CallingConv::SPIR_KERNEL) &&
906+
(F.getMetadata("sycl_explicit_simd") != nullptr);
907+
}
919908

909+
template <class CallGraphNodeF>
910+
static void traverseCallgraphUp(llvm::Function *F, CallGraphNodeF ApplyF) {
920911
SmallPtrSet<Function *, 32> FunctionsVisited;
921912
SmallVector<Function *, 32> Worklist{F};
913+
922914
while (!Worklist.empty()) {
923915
Function *CurF = Worklist.pop_back_val();
924916
FunctionsVisited.insert(CurF);
925-
926-
// Update the meta data attribute for the current function.
927-
for (auto Node : GenXKernelMD->operands()) {
928-
if (Node->getNumOperands() <= MD ||
929-
getVal(Node->getOperand(genx::KernelMDOp::FunctionRef)) != CurF)
930-
continue;
931-
932-
llvm::Value *Old = getVal(Node->getOperand(MD));
933-
uint64_t OldVal = cast<llvm::ConstantInt>(Old)->getZExtValue();
934-
if (OldVal < NewVal) {
935-
llvm::Value *New = llvm::ConstantInt::get(Old->getType(), NewVal);
936-
Node->replaceOperandWith(MD, getMD(New));
937-
}
938-
}
917+
// Apply the action function.
918+
ApplyF(CurF);
939919

940920
// Update all callers as well.
941921
for (auto It = CurF->use_begin(); It != CurF->use_end(); It++) {
942922
auto FCall = It->getUser();
943923
if (!isa<CallInst>(FCall))
944924
llvm::report_fatal_error(
945925
llvm::Twine(__FILE__ " ") +
946-
"Found an intrinsic violating assumption on usage from a kernel or "
947-
"a func directly called from a kernel");
926+
"Function use other than call detected while traversing call\n"
927+
"graph starting from kernel property mark-up intrinsic.");
948928

949929
auto FCaller = cast<CallInst>(FCall)->getFunction();
950930
if (!FunctionsVisited.count(FCaller))
@@ -953,6 +933,68 @@ static void updateGenXMDNodes(llvm::Function *F, genx::KernelMDOp MD,
953933
}
954934
}
955935

936+
// A functor which updates ESIMD kernel's uint64_t metadata in case it is less
937+
// than the given one. Used in callgraph traversal to update nbarriers or SLM
938+
// size metadata. Update is performed by the '()' operator and happens only
939+
// when given function matches one of the kernels - thus, only reachable kernels
940+
// are updated.
941+
// TODO: 1) In general this function is supposed to handle intrinsics
942+
// translated into kernel's metadata. So, the primary/intended usage model is
943+
// when such intrinsics are called from kernels.
944+
// 2) For now such intrinsics are also handled in functions directly called
945+
// from kernels and being translate into those caller-kernel meeven though such
946+
// behaviour is not fully specified/documented.
947+
// 3) This code (or the code in FE) must verify that slm_init or other such
948+
// intrinsic is not called from another module because kernels in that other
949+
// module would not get updated meta data attributes.
950+
struct UpdateUint64MetaDataToMaxValue {
951+
Module &M;
952+
// The uint64_t metadata key to update.
953+
genx::KernelMDOp Key;
954+
// The new metadata value. Must be greater than the old for update to happen.
955+
uint64_t NewVal;
956+
// Pre-selected nodes from GENX_KERNEL_METADATA which can only potentially be
957+
// updated.
958+
SmallVector<MDNode *, 4> CandidatesToUpdate;
959+
960+
UpdateUint64MetaDataToMaxValue(Module &M, genx::KernelMDOp Key,
961+
uint64_t NewVal)
962+
: M(M), Key(Key), NewVal(NewVal) {
963+
// Pre-select nodes for update to do less work in the '()' operator.
964+
llvm::NamedMDNode *GenXKernelMD = M.getNamedMetadata(GENX_KERNEL_METADATA);
965+
assert(GenXKernelMD && "invalid genx.kernels metadata");
966+
for (auto Node : GenXKernelMD->operands()) {
967+
if (Node->getNumOperands() <= (unsigned)Key) {
968+
continue;
969+
}
970+
llvm::Value *Old = getVal(Node->getOperand(Key));
971+
uint64_t OldVal = cast<llvm::ConstantInt>(Old)->getZExtValue();
972+
973+
if (OldVal < NewVal) {
974+
CandidatesToUpdate.push_back(Node);
975+
}
976+
}
977+
}
978+
979+
void operator()(Function *F) {
980+
// Update the meta data attribute for the current function.
981+
for (auto Node : CandidatesToUpdate) {
982+
assert(Node->getNumOperands() > (unsigned)Key);
983+
984+
if (getVal(Node->getOperand(genx::KernelMDOp::FunctionRef)) != F) {
985+
continue;
986+
}
987+
llvm::Value *Old = getVal(Node->getOperand(Key));
988+
#ifndef _NDEBUG
989+
uint64_t OldVal = cast<llvm::ConstantInt>(Old)->getZExtValue();
990+
assert(OldVal < NewVal);
991+
#endif // _NDEBUG
992+
llvm::Value *New = llvm::ConstantInt::get(Old->getType(), NewVal);
993+
Node->replaceOperandWith(Key, getMD(New));
994+
}
995+
}
996+
};
997+
956998
// This function sets/updates VCSLMSize attribute to the kernels
957999
// calling this intrinsic initializing SLM memory.
9581000
static void translateSLMInit(CallInst &CI) {
@@ -964,7 +1006,9 @@ static void translateSLMInit(CallInst &CI) {
9641006

9651007
uint64_t NewVal = cast<llvm::ConstantInt>(ArgV)->getZExtValue();
9661008
assert(NewVal != 0 && "zero slm bytes being requested");
967-
updateGenXMDNodes(F, genx::KernelMDOp::SLMSize, NewVal);
1009+
UpdateUint64MetaDataToMaxValue SetMaxSLMSize{
1010+
*F->getParent(), genx::KernelMDOp::SLMSize, NewVal};
1011+
traverseCallgraphUp(F, SetMaxSLMSize);
9681012
}
9691013

9701014
// This function sets/updates VCNamedBarrierCount attribute to the kernels
@@ -979,7 +1023,9 @@ static void translateNbarrierInit(CallInst &CI) {
9791023

9801024
auto NewVal = cast<llvm::ConstantInt>(ArgV)->getZExtValue();
9811025
assert(NewVal != 0 && "zero named barrier count being requested");
982-
updateGenXMDNodes(F, genx::KernelMDOp::NBarrierCnt, NewVal);
1026+
UpdateUint64MetaDataToMaxValue SetMaxNBarrierCnt{
1027+
*F->getParent(), genx::KernelMDOp::NBarrierCnt, NewVal};
1028+
traverseCallgraphUp(F, SetMaxNBarrierCnt);
9831029
}
9841030

9851031
static void translatePackMask(CallInst &CI) {
@@ -1099,6 +1145,33 @@ static void translateGetSurfaceIndex(CallInst &CI) {
10991145
CI.replaceAllUsesWith(SI);
11001146
}
11011147

1148+
// Kernel property identifiers. Should match ones in
1149+
// sycl/include/sycl/ext/intel/experimental/esimd/kernel_properties.hpp
1150+
enum property_ids { use_double_grf = 0 };
1151+
1152+
static void translateSetKernelProperties(CallInst &CI) {
1153+
auto F = CI.getFunction();
1154+
auto *ArgV = CI.getArgOperand(0);
1155+
if (!isa<ConstantInt>(ArgV))
1156+
llvm::report_fatal_error(
1157+
llvm::Twine(__FILE__ " ") +
1158+
"integral constant is expected for set_kernel_properties");
1159+
uint64_t PropID = cast<llvm::ConstantInt>(ArgV)->getZExtValue();
1160+
1161+
switch (PropID) {
1162+
case property_ids::use_double_grf:
1163+
traverseCallgraphUp(F, [](Function *GraphNode) {
1164+
if (!isESIMDKernel(*GraphNode)) {
1165+
return;
1166+
}
1167+
GraphNode->addFnAttr(ATTR_DOUBLE_GRF);
1168+
});
1169+
break;
1170+
default:
1171+
assert(false && "Invalid property id");
1172+
}
1173+
}
1174+
11021175
// Newly created GenX intrinsic might have different return type than expected.
11031176
// This helper function creates cast operation from GenX intrinsic return type
11041177
// to currently expected. Returns pointer to created cast instruction if it
@@ -1514,8 +1587,7 @@ void generateKernelMetadata(Module &M) {
15141587

15151588
for (auto &F : M.functions()) {
15161589
// Skip non-SIMD kernels.
1517-
if (F.getCallingConv() != CallingConv::SPIR_KERNEL ||
1518-
F.getMetadata("sycl_explicit_simd") == nullptr)
1590+
if (!isESIMDKernel(F))
15191591
continue;
15201592

15211593
// Metadata node containing N i32s, where N is the number of kernel
@@ -1708,15 +1780,14 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
17081780

17091781
// process ESIMD builtins that go through special handling instead of
17101782
// the translation procedure
1711-
// TODO FIXME slm_init should be made top-level __esimd_slm_init
1783+
17121784
if (Name.startswith("__esimd_slm_init") &&
17131785
isa<ConstantInt>(CI->getArgOperand(0))) {
17141786
// tag the kernel with meta-data SLMSize, and remove this builtin
17151787
translateSLMInit(*CI);
17161788
ToErase.push_back(CI);
17171789
continue;
17181790
}
1719-
17201791
if (Name.startswith("__esimd_nbarrier_init")) {
17211792
translateNbarrierInit(*CI);
17221793
ToErase.push_back(CI);
@@ -1748,12 +1819,16 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
17481819
continue;
17491820
}
17501821
}
1751-
17521822
if (Name.startswith("__esimd_get_surface_index")) {
17531823
translateGetSurfaceIndex(*CI);
17541824
ToErase.push_back(CI);
17551825
continue;
17561826
}
1827+
if (Name.startswith("__esimd_set_kernel_properties")) {
1828+
translateSetKernelProperties(*CI);
1829+
ToErase.push_back(CI);
1830+
continue;
1831+
}
17571832

17581833
if (Name.empty() || !Name.startswith(ESIMD_INTRIN_PREF1))
17591834
continue;
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
; This test checks handling of the
2+
; set_kernel_properties(kernel_properties::use_double_grf);
3+
; by the post-link-tool:
4+
; - ESIMD/SYCL splitting happens as usual
5+
; - ESIMD module is further split into callgraphs for entry points requesting
6+
; "double GRF" and callgraphs for entry points which are not
7+
; - Compiler adds 'isDoubleGRFEsimdImage' property to the ESIMD device binary
8+
; images requesting "double GRF"
9+
10+
; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S %s -o %t.table
11+
; RUN: FileCheck %s -input-file=%t.table
12+
; RUN: FileCheck %s -input-file=%t_esimd_x2grf_0.ll --check-prefixes CHECK-ESIMD-2xGRF-IR
13+
; RUN: FileCheck %s -input-file=%t_esimd_x2grf_0.prop --check-prefixes CHECK-ESIMD-2xGRF-PROP
14+
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYCL-SYM
15+
; RUN: FileCheck %s -input-file=%t_esimd_0.sym --check-prefixes CHECK-ESIMD-SYM
16+
; RUN: FileCheck %s -input-file=%t_esimd_x2grf_0.sym --check-prefixes CHECK-ESIMD-2xGRF-SYM
17+
18+
; CHECK: [Code|Properties|Symbols]
19+
; CHECK: {{.*}}esimd_0.ll|{{.*}}esimd_0.prop|{{.*}}esimd_0.sym
20+
; CHECK: {{.*}}esimd_x2grf_0.ll|{{.*}}esimd_x2grf_0.prop|{{.*}}esimd_x2grf_0.sym
21+
; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym
22+
23+
; CHECK-ESIMD-2xGRF-PROP: isEsimdImage=1|1
24+
; CHECK-ESIMD-2xGRF-PROP: isDoubleGRFEsimdImage=1|1
25+
26+
; CHECK-SYCL-SYM: __SYCL_kernel
27+
; CHECK-SYCL-SYM-EMPTY:
28+
29+
; CHECK-ESIMD-SYM: __ESIMD_kernel
30+
; CHECK-ESIMD-SYM-EMPTY:
31+
32+
; CHECK-ESIMD-2xGRF-SYM: __ESIMD_double_grf_kernel
33+
; CHECK-ESIMD-2xGRF-SYM-EMPTY:
34+
35+
; ModuleID = 'double_grf.bc'
36+
source_filename = "llvm-link"
37+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
38+
target triple = "spir64-unknown-unknown"
39+
40+
define weak_odr dso_local spir_kernel void @__SYCL_kernel() #0 {
41+
entry:
42+
ret void
43+
}
44+
45+
define weak_odr dso_local spir_kernel void @__ESIMD_kernel() #0 !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 {
46+
entry:
47+
ret void
48+
}
49+
50+
define dso_local spir_func void @_Z17double_grf_markerv() {
51+
entry:
52+
call spir_func void @_Z29__esimd_set_kernel_propertiesi(i32 noundef 0)
53+
; -- Check that ESIMD lowering removed the marker call above:
54+
; CHECK-ESIMD-2xGRF-IR-NOT: {{.*}} @_Z29__esimd_set_kernel_propertiesi
55+
ret void
56+
}
57+
58+
declare dso_local spir_func void @_Z29__esimd_set_kernel_propertiesi(i32 noundef)
59+
60+
define weak_odr dso_local spir_kernel void @__ESIMD_double_grf_kernel() #0 !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 {
61+
entry:
62+
call spir_func void @_Z17double_grf_markerv()
63+
ret void
64+
}
65+
66+
attributes #0 = { "sycl-module-id"="a.cpp" }
67+
68+
!0 = !{}
69+
!1 = !{i32 1}

0 commit comments

Comments
 (0)