Skip to content

Commit b5b3a54

Browse files
authored
[SYCL][Fusion] Pass accessor element size in bytes to internalization pass (#12108)
This PR removes some remnants of the opaque pointer transition from the internalization pass. For private internalization, we insert an `alloca` instruction to model a work-item's private storage. Currently, we have to infer the accessor's element type by looking at its users. Now, we simply pass the element size as metadata to the pass, and insert an `i8`-`alloca` with the desired size in bytes. I'm using separate `LocalSize` (= number of buffer elements associated with each work-item resp. work-group) and `ElemSize` here for now as the remapping of GEP instructions still works in terms of number-of-elements. If in the future these are rewritten to perform `i8*` arithmetic as well, we could go back to only passing a single size (then in bytes instead of number of elements). --------- Signed-off-by: Julian Oppermann <[email protected]>
1 parent 39db079 commit b5b3a54

File tree

14 files changed

+181
-183
lines changed

14 files changed

+181
-183
lines changed

sycl-fusion/jit-compiler/include/Parameter.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,15 +78,17 @@ struct ParameterInternalization {
7878
Parameter Param;
7979
Internalization Intern;
8080
std::size_t LocalSize;
81+
std::size_t ElemSize;
8182
ParameterInternalization() = default;
8283
ParameterInternalization(const Parameter &Param, Internalization Intern,
83-
std::size_t LocalSize)
84-
: Param{Param}, Intern{Intern}, LocalSize{LocalSize} {}
84+
std::size_t LocalSize, std::size_t ElemSize)
85+
: Param{Param}, Intern{Intern}, LocalSize{LocalSize}, ElemSize(ElemSize) {
86+
}
8587

8688
friend bool operator==(const ParameterInternalization &LHS,
8789
const ParameterInternalization &RHS) noexcept {
88-
return LHS.LocalSize == RHS.LocalSize && LHS.Intern == RHS.Intern &&
89-
LHS.Param == RHS.Param;
90+
return LHS.LocalSize == RHS.LocalSize && LHS.ElemSize == RHS.ElemSize &&
91+
LHS.Intern == RHS.Intern && LHS.Param == RHS.Param;
9092
}
9193

9294
friend bool operator!=(const ParameterInternalization &LHS,

sycl-fusion/jit-compiler/lib/fusion/FusionHelper.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ Expected<std::unique_ptr<Module>> helper::FusionHelper::addFusedKernel(
4747
const char *ParameterMDKind = "sycl.kernel.param";
4848
const char *InternalizationMDKind = "sycl.kernel.promote";
4949
const char *InternalizationLSMDKind = "sycl.kernel.promote.localsize";
50+
const char *InternalizationESMDKind = "sycl.kernel.promote.elemsize";
5051
const char *ConstantsMDKind = "sycl.kernel.constants";
5152
// The function type of each kernel stub is identical ("void()"),
5253
// the fusion pass will insert the correct arguments based on
@@ -149,13 +150,16 @@ Expected<std::unique_ptr<Module>> helper::FusionHelper::addFusedKernel(
149150
if (!Internalization.empty()) {
150151
SmallVector<Metadata *> MDInternalizationKind;
151152
SmallVector<Metadata *> MDInternalizationLocalSize;
153+
SmallVector<Metadata *> MDInternalizationElemSize;
152154
const auto EmplaceBackIntern = [&](const auto &Info, auto Str) {
153155
std::array<Metadata *, 2> MDs;
154156
MDs[0] = getMDParam(LLVMCtx, Info.Param);
155157
MDs[1] = MDString::get(LLVMCtx, Str);
156158
MDInternalizationKind.emplace_back(MDNode::get(LLVMCtx, MDs));
157159
MDs[1] = getConstantIntMD<std::size_t>(LLVMCtx, Info.LocalSize);
158160
MDInternalizationLocalSize.emplace_back(MDNode::get(LLVMCtx, MDs));
161+
MDs[1] = getConstantIntMD<std::size_t>(LLVMCtx, Info.ElemSize);
162+
MDInternalizationElemSize.emplace_back(MDNode::get(LLVMCtx, MDs));
159163
};
160164
for (const auto &Info : Internalization) {
161165
constexpr StringLiteral LocalInternalizationStr{"local"};
@@ -177,10 +181,13 @@ Expected<std::unique_ptr<Module>> helper::FusionHelper::addFusedKernel(
177181
}
178182
assert(!F->hasMetadata(InternalizationMDKind));
179183
assert(!F->hasMetadata(InternalizationLSMDKind));
184+
assert(!F->hasMetadata(InternalizationESMDKind));
180185
F->setMetadata(InternalizationMDKind,
181186
MDNode::get(LLVMCtx, MDInternalizationKind));
182187
F->setMetadata(InternalizationLSMDKind,
183188
MDNode::get(LLVMCtx, MDInternalizationLocalSize));
189+
F->setMetadata(InternalizationESMDKind,
190+
MDNode::get(LLVMCtx, MDInternalizationElemSize));
184191
}
185192
}
186193

sycl-fusion/passes/internalization/Internalization.cpp

Lines changed: 128 additions & 146 deletions
Large diffs are not rendered by default.

sycl-fusion/passes/internalization/Internalization.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class SYCLInternalizer : public PassInfoMixin<SYCLInternalizer> {
2020
public:
2121
constexpr static StringLiteral Key{"sycl.kernel.promote"};
2222
constexpr static StringLiteral LocalSizeKey{"sycl.kernel.promote.localsize"};
23+
constexpr static StringLiteral ElemSizeKey{"sycl.kernel.promote.elemsize"};
2324

2425
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
2526
};

sycl-fusion/passes/kernel-fusion/SYCLKernelFusion.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,8 @@ Error SYCLKernelFusion::fuseKernel(
490490
ParamMapping, DefaultInternalizationVal);
491491
copyArgsMD(LLVMCtx, SYCLInternalizer::LocalSizeKey, StubFunction,
492492
*FusedFunction, ParamMapping);
493+
copyArgsMD(LLVMCtx, SYCLInternalizer::ElemSizeKey, StubFunction,
494+
*FusedFunction, ParamMapping);
493495
// and JIT constants
494496
copyArgsMD(LLVMCtx, SYCLCP::Key, StubFunction, *FusedFunction,
495497
ParamMapping);

sycl-fusion/passes/target/TargetFusionInfo.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class TargetFusionInfoImpl {
7676

7777
virtual void
7878
updateAddressSpaceMetadata([[maybe_unused]] Function *KernelFunc,
79-
[[maybe_unused]] ArrayRef<size_t> LocalSize,
79+
[[maybe_unused]] ArrayRef<bool> ArgIsPromoted,
8080
[[maybe_unused]] unsigned AddressSpace) const {}
8181

8282
virtual std::optional<BuiltinKind> getBuiltinKind(Function *F) const = 0;
@@ -250,7 +250,7 @@ class SPIRVTargetFusionInfo : public TargetFusionInfoImpl {
250250
unsigned getLocalAddressSpace() const override { return 3; }
251251

252252
void updateAddressSpaceMetadata(Function *KernelFunc,
253-
ArrayRef<size_t> LocalSize,
253+
ArrayRef<bool> ArgIsPromoted,
254254
unsigned AddressSpace) const override {
255255
static constexpr unsigned AddressSpaceBitWidth{32};
256256
static constexpr StringLiteral KernelArgAddrSpaceMD{
@@ -265,8 +265,8 @@ class SPIRVTargetFusionInfo : public TargetFusionInfoImpl {
265265
// we should update it in the new one.
266266
SmallVector<Metadata *> NewInfo{AddrspaceMD->op_begin(),
267267
AddrspaceMD->op_end()};
268-
for (auto I : enumerate(LocalSize)) {
269-
if (I.value() == 0) {
268+
for (auto I : enumerate(ArgIsPromoted)) {
269+
if (!I.value()) {
270270
continue;
271271
}
272272
const auto Index = I.index();
@@ -1149,9 +1149,9 @@ unsigned TargetFusionInfo::getLocalAddressSpace() const {
11491149
}
11501150

11511151
void TargetFusionInfo::updateAddressSpaceMetadata(Function *KernelFunc,
1152-
ArrayRef<size_t> LocalSize,
1152+
ArrayRef<bool> ArgIsPromoted,
11531153
unsigned AddressSpace) const {
1154-
Impl->updateAddressSpaceMetadata(KernelFunc, LocalSize, AddressSpace);
1154+
Impl->updateAddressSpaceMetadata(KernelFunc, ArgIsPromoted, AddressSpace);
11551155
}
11561156

11571157
llvm::ArrayRef<llvm::StringRef>

sycl-fusion/passes/target/TargetFusionInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class TargetFusionInfo {
6666
unsigned getLocalAddressSpace() const;
6767

6868
void updateAddressSpaceMetadata(Function *KernelFunc,
69-
ArrayRef<size_t> LocalSize,
69+
ArrayRef<bool> ArgIsPromoted,
7070
unsigned AddressSpace) const;
7171

7272
///

sycl-fusion/test/internalization/promote-local-nested.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ declare void @llvm.assume(i1 noundef) #1
2323
; Function Attrs: nounwind willreturn memory(none)
2424
declare spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32) #2
2525

26-
define spir_kernel void @fused_0(ptr addrspace(1) align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accIn1, ptr byval(%"class.sycl::_V1::id") align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accIn13, ptr addrspace(1) align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accIn2, ptr byval(%"class.sycl::_V1::id") align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accIn26, ptr addrspace(1) align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accTmp, ptr byval(%"class.sycl::_V1::id") align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accTmp9, ptr addrspace(1) align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo__arg_accIn3, ptr byval(%"class.sycl::_V1::id") align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo__arg_accIn36, ptr addrspace(1) align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo__arg_accOut, ptr byval(%"class.sycl::_V1::id") align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo__arg_accOut9) !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !10 !sycl.kernel.promote !11 !sycl.kernel.promote.localsize !12 !sycl.kernel.constants !13 {
26+
define spir_kernel void @fused_0(ptr addrspace(1) align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accIn1, ptr byval(%"class.sycl::_V1::id") align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accIn13, ptr addrspace(1) align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accIn2, ptr byval(%"class.sycl::_V1::id") align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accIn26, ptr addrspace(1) align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accTmp, ptr byval(%"class.sycl::_V1::id") align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accTmp9, ptr addrspace(1) align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo__arg_accIn3, ptr byval(%"class.sycl::_V1::id") align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo__arg_accIn36, ptr addrspace(1) align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo__arg_accOut, ptr byval(%"class.sycl::_V1::id") align 8 %_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo__arg_accOut9) !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_type_qual !9 !kernel_arg_base_type !8 !kernel_arg_name !10 !sycl.kernel.promote !11 !sycl.kernel.promote.localsize !12 !sycl.kernel.promote.elemsize !13 !sycl.kernel.constants !14 {
2727
; Scenario: Test the successful local internalization of the pointer argument
2828
; `...KernelOne__arg_accTmp`. This means the pointer argument has been replaced
2929
; by a pointer to the local address space (address space 3), and offset-wrapping
@@ -177,4 +177,5 @@ attributes #4 = { nocallback nofree nosync nounwind willreturn memory(argmem: re
177177
!10 = !{!"_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accIn1", !"_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accIn13", !"_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accIn2", !"_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accIn26", !"_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accTmp", !"_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E9KernelOne__arg_accTmp9", !"_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo__arg_accIn3", !"_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo__arg_accIn36", !"_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo__arg_accOut", !"_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E9KernelTwo__arg_accOut9"}
178178
!11 = !{!"none", !"none", !"none", !"none", !"local", !"none", !"none", !"none", !"none", !"none"}
179179
!12 = !{!"", !"", !"", !"", i64 4, !"", !"", !"", !"", !""}
180-
!13 = !{!"", !"\00\00\00\00\00\00\00\00", !"", !"\00\00\00\00\00\00\00\00", !"", !"", !"", !"\00\00\00\00\00\00\00\00", !"", !"\00\00\00\00\00\00\00\00"}
180+
!13 = !{!"", !"", !"", !"", i64 32, !"", !"", !"", !"", !""}
181+
!14 = !{!"", !"\00\00\00\00\00\00\00\00", !"", !"\00\00\00\00\00\00\00\00", !"", !"", !"", !"\00\00\00\00\00\00\00\00", !"", !"\00\00\00\00\00\00\00\00"}

sycl-fusion/test/internalization/promote-local-scalar.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ declare spir_func void @__itt_offload_wi_finish_stub(ptr addrspace(4) %group_id,
3535
declare spir_func void @__itt_offload_wi_start_stub(ptr addrspace(4) %group_id, i64 %wi_id, i32 %wg_size) #4
3636

3737

38-
define spir_kernel void @fused_0(ptr addrspace(1) align 4 %KernelOne_accTmp, ptr byval(%0) align 8 %KernelOne_accTmp3, ptr addrspace(1) align 4 %KernelOne_accIn1, ptr byval(%0) align 8 %KernelOne_accIn16, ptr addrspace(1) align 4 %KernelOne_accIn2, ptr addrspace(1) align 4 %KernelTwo_accOut, ptr addrspace(1) align 4 %KernelTwo_accIn3) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 {
38+
define spir_kernel void @fused_0(ptr addrspace(1) align 4 %KernelOne_accTmp, ptr byval(%0) align 8 %KernelOne_accTmp3, ptr addrspace(1) align 4 %KernelOne_accIn1, ptr byval(%0) align 8 %KernelOne_accIn16, ptr addrspace(1) align 4 %KernelOne_accIn2, ptr addrspace(1) align 4 %KernelTwo_accOut, ptr addrspace(1) align 4 %KernelTwo_accIn3) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 !sycl.kernel.promote.elemsize !19 {
3939
; Scenario: Test the successful local internalization of the first pointer
4040
; argument. This means, the first pointer argument has been replaced by a
4141
; pointer to local address space (address space 3).
@@ -138,3 +138,4 @@ attributes #5 = { nounwind }
138138
!16 = !{!"KernelOne_accTmp", !"KernelOne_accTmp3", !"KernelOne_accIn1", !"KernelOne_accIn16", !"KernelOne_accIn2", !"KernelTwo_accOut", !"KernelTwo_accIn3"}
139139
!17 = !{!"local", !"none", !"none", !"none", !"none", !"none", !"none"}
140140
!18 = !{i64 16, !"", !"", !"", !"", !"", !""}
141+
!19 = !{i64 4, !"", !"", !"", !"", !"", !""}

sycl-fusion/test/internalization/promote-local-vec.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ declare spir_func void @__itt_offload_wi_finish_stub(ptr addrspace(4) %group_id
3535
; Function Attrs: noinline nounwind
3636
declare spir_func void @__itt_offload_wi_start_stub(ptr addrspace(4) %group_id, i64 %wi_id, i32 %wg_size) #4
3737

38-
define spir_kernel void @fused_0(ptr addrspace(1) align 16 %KernelOne_accTmp, ptr byval(%1) align 8 %KernelOne_accTmp3, ptr addrspace(1) align 16 %KernelOne_accIn1, ptr byval(%1) align 8 %KernelOne_accIn16, ptr addrspace(1) align 16 %KernelOne_accIn2, ptr addrspace(1) align 16 %KernelTwo_accOut, ptr addrspace(1) align 16 %KernelTwo_accIn3) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 {
38+
define spir_kernel void @fused_0(ptr addrspace(1) align 16 %KernelOne_accTmp, ptr byval(%1) align 8 %KernelOne_accTmp3, ptr addrspace(1) align 16 %KernelOne_accIn1, ptr byval(%1) align 8 %KernelOne_accIn16, ptr addrspace(1) align 16 %KernelOne_accIn2, ptr addrspace(1) align 16 %KernelTwo_accOut, ptr addrspace(1) align 16 %KernelTwo_accIn3) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 !sycl.kernel.promote.elemsize !19 {
3939
; Scenario: Test the successful private internalization of the first pointer
4040
; argument. This means, the first pointer argument has been replaced by a
4141
; function-local alloca and all accesses have been updated to use this alloca
@@ -147,3 +147,4 @@ attributes #5 = { nounwind }
147147
!16 = !{!"KernelOne_accTmp", !"KernelOne_accTmp3", !"KernelOne_accIn1", !"KernelOne_accIn16", !"KernelOne_accIn2", !"KernelTwo_accOut", !"KernelTwo_accIn3"}
148148
!17 = !{!"local", !"none", !"none", !"none", !"none", !"none", !"none"}
149149
!18 = !{i64 16, !"", !"", !"", !"", !"", !""}
150+
!19 = !{i64 16, !"", !"", !"", !"", !"", !""}

0 commit comments

Comments
 (0)