Skip to content

Commit ee98d31

Browse files
authored
[SYCL][ESIMD] Use vload/vstore for simd object getter/setter. (#1887)
Use special intrinsics to access simd objects in the private address space to disable standard LLVM optimizations on them. Author: Gang Chen <[email protected]>
1 parent 14af095 commit ee98d31

File tree

7 files changed

+185
-0
lines changed

7 files changed

+185
-0
lines changed

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,7 @@ void initializeStripSymbolsPass(PassRegistry&);
415415
void initializeStructurizeCFGPass(PassRegistry&);
416416
void initializeSYCLLowerWGScopeLegacyPassPass(PassRegistry &);
417417
void initializeSYCLLowerESIMDLegacyPassPass(PassRegistry &);
418+
void initializeESIMDLowerLoadStorePass(PassRegistry &);
418419
void initializeTailCallElimPass(PassRegistry&);
419420
void initializeTailDuplicatePass(PassRegistry&);
420421
void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&);

llvm/include/llvm/LinkAllPasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ namespace {
203203
(void) llvm::createExpandMemCmpPass();
204204
(void)llvm::createSYCLLowerWGScopePass();
205205
(void)llvm::createSYCLLowerESIMDPass();
206+
(void)llvm::createESIMDLowerLoadStorePass();
206207
std::string buf;
207208
llvm::raw_string_ostream os(buf);
208209
(void) llvm::createPrintModulePass(os);

llvm/include/llvm/SYCLLowerIR/LowerESIMD.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@ class SYCLLowerESIMDPass : public PassInfoMixin<SYCLLowerESIMDPass> {
3434
FunctionPass *createSYCLLowerESIMDPass();
3535
void initializeSYCLLowerESIMDLegacyPassPass(PassRegistry &);
3636

37+
class ESIMDLowerLoadStorePass : public PassInfoMixin<ESIMDLowerLoadStorePass> {
38+
public:
39+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
40+
};
41+
42+
FunctionPass *createESIMDLowerLoadStorePass();
43+
void initializeESIMDLowerLoadStorePass(PassRegistry &);
44+
3745
} // namespace llvm
3846

3947
#endif // LLVM_SYCLLOWERIR_LOWERESIMD_H

llvm/lib/SYCLLowerIR/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ endif()
3131
add_llvm_component_library(LLVMSYCLLowerIR
3232
LowerWGScope.cpp
3333
LowerESIMD.cpp
34+
LowerESIMDVLoadVStore.cpp
3435

3536
ADDITIONAL_HEADER_DIRS
3637
${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
//===- LowerESIMDVLoadVStore.cpp - lower vload/vstore to load/store -------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// convert vload/vstore to load/store if they are not for genx_volatile.
10+
//
11+
// File scope simd variables marked with genx_volatile attribute want
12+
// guarranteed allocation in register file, therefore we use vload/vstore
13+
// instead of load/store, so they won't be optimized away by llvm.
14+
//
15+
// For ordinary simd variables, we do not need to protect load/store. But
16+
// there is no good way to do this in clang. So we need this pass in the
17+
// end of module passes to separate the cases that we need vload/vstore vs.
18+
// the cases that we do not need vload/vstore
19+
//
20+
//===----------------------------------------------------------------------===//
21+
22+
#define DEBUG_TYPE "loweresimdvloadvstore"
23+
24+
#include "llvm/GenXIntrinsics/GenXIntrinsics.h"
25+
#include "llvm/IR/Function.h"
26+
#include "llvm/IR/IRBuilder.h"
27+
#include "llvm/IR/InstIterator.h"
28+
#include "llvm/IR/Instructions.h"
29+
#include "llvm/IR/IntrinsicInst.h"
30+
#include "llvm/IR/Module.h"
31+
#include "llvm/SYCLLowerIR/LowerESIMD.h"
32+
#include "llvm/Support/Debug.h"
33+
#include "llvm/Transforms/Scalar.h"
34+
35+
#include "llvm/Pass.h"
36+
37+
using namespace llvm;
38+
39+
namespace llvm {
40+
void initializeESIMDLowerLoadStorePass(PassRegistry &);
41+
}
42+
43+
namespace {
44+
45+
class ESIMDLowerLoadStore : public FunctionPass {
46+
public:
47+
static char ID;
48+
ESIMDLowerLoadStore() : FunctionPass(ID) {
49+
initializeESIMDLowerLoadStorePass(*PassRegistry::getPassRegistry());
50+
}
51+
virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
52+
AU.setPreservesCFG();
53+
}
54+
55+
virtual bool runOnFunction(Function &F) override {
56+
FunctionAnalysisManager FAM;
57+
auto PA = Impl.run(F, FAM);
58+
return !PA.areAllPreserved();
59+
}
60+
61+
private:
62+
ESIMDLowerLoadStorePass Impl;
63+
};
64+
65+
} // namespace
66+
67+
char ESIMDLowerLoadStore::ID = 0;
68+
INITIALIZE_PASS(ESIMDLowerLoadStore, "ESIMDLowerLoadStore",
69+
"Lower ESIMD reference loads and stores", false, false)
70+
71+
// Lower non-volatilE vload/vstore intrinsic calls into normal load/store
72+
// instructions.
73+
PreservedAnalyses ESIMDLowerLoadStorePass::run(Function &F,
74+
FunctionAnalysisManager &FAM) {
75+
std::vector<Instruction *> ToErase;
76+
for (Instruction &Inst : instructions(F)) {
77+
if (!GenXIntrinsic::isVLoadStore(&Inst))
78+
continue;
79+
80+
auto *Ptr = Inst.getOperand(0);
81+
if (GenXIntrinsic::isVStore(&Inst))
82+
Ptr = Inst.getOperand(1);
83+
auto AS0 = cast<PointerType>(Ptr->getType())->getAddressSpace();
84+
Ptr = Ptr->stripPointerCasts();
85+
auto GV = dyn_cast<GlobalVariable>(Ptr);
86+
if (!GV || !GV->hasAttribute("genx_volatile")) {
87+
// change to load/store
88+
IRBuilder<> Builder(&Inst);
89+
if (GenXIntrinsic::isVStore(&Inst))
90+
Builder.CreateStore(Inst.getOperand(0), Inst.getOperand(1));
91+
else {
92+
auto LI = Builder.CreateLoad(Inst.getOperand(0), Inst.getName());
93+
LI->setDebugLoc(Inst.getDebugLoc());
94+
Inst.replaceAllUsesWith(LI);
95+
}
96+
ToErase.push_back(&Inst);
97+
} else {
98+
// change to vload/vstore that has the same address space as
99+
// the global-var in order to clean up unnecessary addr-cast.
100+
auto AS1 = GV->getType()->getAddressSpace();
101+
if (AS0 != AS1) {
102+
IRBuilder<> Builder(&Inst);
103+
if (GenXIntrinsic::isVStore(&Inst)) {
104+
auto PtrTy = cast<PointerType>(Inst.getOperand(1)->getType());
105+
PtrTy = PointerType::get(PtrTy->getElementType(), AS1);
106+
auto PtrCast = Builder.CreateAddrSpaceCast(Inst.getOperand(1), PtrTy);
107+
Type *Tys[] = {Inst.getOperand(0)->getType(), PtrCast->getType()};
108+
Value *Args[] = {Inst.getOperand(0), PtrCast};
109+
Function *Fn = GenXIntrinsic::getGenXDeclaration(
110+
F.getParent(), GenXIntrinsic::genx_vstore, Tys);
111+
Builder.CreateCall(Fn, Args, Inst.getName());
112+
} else {
113+
auto PtrTy = cast<PointerType>(Inst.getOperand(0)->getType());
114+
PtrTy = PointerType::get(PtrTy->getElementType(), AS1);
115+
auto PtrCast = Builder.CreateAddrSpaceCast(Inst.getOperand(0), PtrTy);
116+
Type *Tys[] = {Inst.getType(), PtrCast->getType()};
117+
Function *Fn = GenXIntrinsic::getGenXDeclaration(
118+
F.getParent(), GenXIntrinsic::genx_vload, Tys);
119+
Value *VLoad = Builder.CreateCall(Fn, PtrCast, Inst.getName());
120+
Inst.replaceAllUsesWith(VLoad);
121+
}
122+
ToErase.push_back(&Inst);
123+
}
124+
}
125+
}
126+
127+
for (auto Inst : ToErase) {
128+
Inst->eraseFromParent();
129+
}
130+
131+
return !ToErase.empty() ? PreservedAnalyses::none()
132+
: PreservedAnalyses::all();
133+
}
134+
135+
namespace llvm {
136+
FunctionPass *createESIMDLowerLoadStorePass() {
137+
return new ESIMDLowerLoadStore;
138+
}
139+
} // namespace llvm
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; RUN: opt -ESIMDLowerLoadStore -S < %s | FileCheck %s
2+
3+
%"class._ZTSN2cm3gen4simdIiLi16EEE.cm::gen::simd" = type { <16 x i32> }
4+
5+
@vg = dso_local global %"class._ZTSN2cm3gen4simdIiLi16EEE.cm::gen::simd" zeroinitializer, align 64 #0
6+
@vc = dso_local addrspace(1) global <16 x i32> zeroinitializer, align 64
7+
8+
; Function Attrs: norecurse nounwind
9+
define dso_local spir_func void @_Z3foov() local_unnamed_addr #1 {
10+
; CHECK-LABEL: @_Z3foov(
11+
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.genx.vload.v16i32.p0v16i32(<16 x i32>* getelementptr inbounds (%"class._ZTSN2cm3gen4simdIiLi16EEE.cm::gen::simd", %"class._ZTSN2cm3gen4simdIiLi16EEE.cm::gen::simd"* @vg, i64 0, i32 0))
12+
; CHECK-NEXT: store <16 x i32> [[TMP1]], <16 x i32> addrspace(4)* addrspacecast (<16 x i32> addrspace(1)* @vc to <16 x i32> addrspace(4)*), align 64
13+
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32> addrspace(4)* addrspacecast (<16 x i32> addrspace(1)* @vc to <16 x i32> addrspace(4)*), align 64
14+
; CHECK-NEXT: call void @llvm.genx.vstore.v16i32.p0v16i32(<16 x i32> [[TMP2]], <16 x i32>* getelementptr inbounds (%"class._ZTSN2cm3gen4simdIiLi16EEE.cm::gen::simd", %"class._ZTSN2cm3gen4simdIiLi16EEE.cm::gen::simd"* @vg, i64 0, i32 0))
15+
16+
%call.cm = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"class._ZTSN2cm3gen4simdIiLi16EEE.cm::gen::simd", %"class._ZTSN2cm3gen4simdIiLi16EEE.cm::gen::simd"* @vg, i64 0, i32 0) to <16 x i32> addrspace(4)*))
17+
call void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %call.cm, <16 x i32> addrspace(4)* addrspacecast (<16 x i32> addrspace(1)* @vc to <16 x i32> addrspace(4)*))
18+
%call.cm2 = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* addrspacecast (<16 x i32> addrspace(1)* @vc to <16 x i32> addrspace(4)*))
19+
call void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %call.cm2, <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"class._ZTSN2cm3gen4simdIiLi16EEE.cm::gen::simd", %"class._ZTSN2cm3gen4simdIiLi16EEE.cm::gen::simd"* @vg, i64 0, i32 0) to <16 x i32> addrspace(4)*))
20+
ret void
21+
}
22+
23+
; Function Attrs: nounwind
24+
declare <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)*) #2
25+
26+
; Function Attrs: nounwind
27+
declare void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32>, <16 x i32> addrspace(4)*) #2
28+
29+
attributes #0 = { "genx_byte_offset"="192" "genx_volatile" }
30+
attributes #1 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="512" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
31+
attributes #2 = { nounwind }
32+
33+
!0 = !{}
34+

llvm/tools/opt/opt.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,7 @@ int main(int argc, char **argv) {
595595
initializeTypePromotionPass(Registry);
596596
initializeSYCLLowerWGScopeLegacyPassPass(Registry);
597597
initializeSYCLLowerESIMDLegacyPassPass(Registry);
598+
initializeESIMDLowerLoadStorePass(Registry);
598599

599600
#ifdef BUILD_EXAMPLES
600601
initializeExampleIRTransforms(Registry);

0 commit comments

Comments
 (0)