1
+ /* ========================== begin_copyright_notice ============================
2
+
3
+ Copyright (C) 2023 Intel Corporation
4
+
5
+ SPDX-License-Identifier: MIT
6
+
7
+ ============================= end_copyright_notice ===========================*/
8
+
9
+ #include " BlockMemOpAddrScalarizationPass.hpp"
10
+ #include " CodeGenPublicEnums.h"
11
+ #include " IGCIRBuilder.h"
12
+ #include < llvm/IR/Function.h>
13
+
14
+ #include " Compiler/IGCPassSupport.h"
15
+ #include " Compiler/CISACodeGen/helper.h"
16
+
17
+ #include " common/LLVMWarningsPush.hpp"
18
+ #include " common/LLVMWarningsPop.hpp"
19
+
20
+ using namespace llvm ;
21
+ using namespace IGC ;
22
+
23
+ char BlockMemOpAddrScalarizationPass::ID = 0 ;
24
+
25
+ #define PASS_FLAG " block-memop-addr-scalar"
26
+ #define PASS_DESCRIPTION " Scalarization of address calculations for block memory operations."
27
+ #define PASS_CFG_ONLY false
28
+ #define PASS_ANALYSIS false
29
+ IGC_INITIALIZE_PASS_BEGIN (BlockMemOpAddrScalarizationPass, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
30
+ IGC_INITIALIZE_PASS_DEPENDENCY(WIAnalysis)
31
+ IGC_INITIALIZE_PASS_END(BlockMemOpAddrScalarizationPass, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
32
+
33
+ BlockMemOpAddrScalarizationPass::BlockMemOpAddrScalarizationPass() : FunctionPass(ID) {
34
+ initializeBlockMemOpAddrScalarizationPassPass (*PassRegistry::getPassRegistry ());
35
+ }
36
+
37
+ bool BlockMemOpAddrScalarizationPass::runOnFunction (Function &F) {
38
+ Changed = false ;
39
+ WI = &getAnalysis<WIAnalysis>();
40
+ visit (F);
41
+ InstCanBeScalarized.clear ();
42
+ ExistingBroadcasts.clear ();
43
+ return Changed;
44
+ }
45
+
46
+ void BlockMemOpAddrScalarizationPass::visitCallInst (CallInst& C) {
47
+ if (GenIntrinsicInst *I = dyn_cast<GenIntrinsicInst>(&C)) {
48
+ GenISAIntrinsic::ID id = I->getIntrinsicID ();
49
+ if (id == GenISAIntrinsic::GenISA_simdBlockRead || id == GenISAIntrinsic::GenISA_simdBlockWrite)
50
+ scalarizeAddrArithmForBlockRdWr (I);
51
+ }
52
+ }
53
+
54
+ // This function checks if InstForCheck can be scalarized.
55
+ bool BlockMemOpAddrScalarizationPass::canInstBeScalarized (Instruction *InstForCheck, Instruction *Root) {
56
+ if (checkInst (InstForCheck) != InstType::CanBeScalar)
57
+ return false ;
58
+
59
+ bool GotFinalInst = false ;
60
+ for (auto Op = InstForCheck->op_begin (), E = InstForCheck->op_end (); Op != E; Op++) {
61
+ if (Instruction *IOp = dyn_cast<Instruction>(Op)) {
62
+ GotFinalInst = true ;
63
+ // Don't process any vector instructions.
64
+ if (IOp->getType ()->isVectorTy ())
65
+ return false ;
66
+ }
67
+ }
68
+
69
+ // If InstForCheck does not have any instruction operands, scalarize its result which is used in Root instruction.
70
+ if (!GotFinalInst)
71
+ return false ;
72
+
73
+ // This check showes that InstForCheck is used only the address calculation chain.
74
+ if (InstForCheck->getNumUses () == 1 )
75
+ return true ;
76
+
77
+ SmallVector<std::tuple<Instruction*, Instruction*, bool >, 32 > UseStack;
78
+ SmallVector<Instruction*, 32 > Steps;
79
+ Steps.push_back (InstForCheck);
80
+ for (auto U : InstForCheck->users ()) {
81
+ if (Instruction *I = dyn_cast<Instruction>(U)) {
82
+ if (I != Root) {
83
+ UseStack.push_back ({I, InstForCheck, false });
84
+ }
85
+ }
86
+ }
87
+
88
+ while (UseStack.size ()) {
89
+ if (Steps.back () != std::get<1 >(UseStack.back ()))
90
+ Steps.pop_back ();
91
+
92
+ Instruction *CurrUse = std::get<0 >(UseStack.back ());
93
+ Instruction *CurrRoot = std::get<1 >(UseStack.back ());
94
+
95
+ // If we have already analyzed this instruction.
96
+ if (std::get<2 >(UseStack.back ())) {
97
+ UseStack.pop_back ();
98
+ continue ;
99
+ }
100
+
101
+ // Mark use as visited.
102
+ std::get<2 >(UseStack.back ()) = true ;
103
+
104
+ InstType Res = checkInst (CurrUse);
105
+ if (Res == InstType::BlcokMemOp) {
106
+ Instruction *Op0 = dyn_cast<Instruction>(CurrUse->getOperand (0 ));
107
+ if (Op0 == CurrRoot) {
108
+ UseStack.pop_back ();
109
+ continue ;
110
+ }
111
+ } else if (Res == InstType::PreventScalar) {
112
+ return false ;
113
+ }
114
+
115
+ if (CurrUse->getNumUses ()) {
116
+ Steps.push_back (CurrUse);
117
+ for (auto U : CurrUse->users ()) {
118
+ if (Instruction *I = dyn_cast<Instruction>(U)) {
119
+ // This check helps to avoid hanging in the following example:
120
+ // entry:
121
+ // ...
122
+ // br label bb1
123
+ // bb1:
124
+ // %phires = phi i32 [ %0, %entry ], [ %sum, %bb2 ]
125
+ // %cmp = icmp ult i32 %phires, 20
126
+ // br i1 %cmp, label %bb2, label %bb3
127
+ // bb2:
128
+ // %sum = add i32 %%phires, 1
129
+ // bb3:
130
+ // ...
131
+ if (std::find (Steps.begin (), Steps.end (), I) != Steps.end ())
132
+ continue ;
133
+
134
+ UseStack.push_back ({I, CurrUse, false });
135
+ }
136
+ }
137
+ } else {
138
+ UseStack.pop_back ();
139
+ }
140
+ }
141
+
142
+ return true ;
143
+ }
144
+
145
+ InstType BlockMemOpAddrScalarizationPass::checkInst (Instruction *I) {
146
+ bool Check = false ;
147
+ // If this I instruction is BlockRead/BlockWrite then return true for current user.
148
+ if (GenIntrinsicInst *GenInst = dyn_cast<GenIntrinsicInst>(I)) {
149
+ GenISAIntrinsic::ID Id = GenInst->getIntrinsicID ();
150
+ if (Id == GenISAIntrinsic::GenISA_simdBlockRead || Id == GenISAIntrinsic::GenISA_simdBlockWrite)
151
+ return InstType::BlcokMemOp;
152
+ }
153
+
154
+ if (I->isBinaryOp ())
155
+ Check = true ;
156
+
157
+ if (I->isCast ())
158
+ Check = true ;
159
+
160
+ if (isa<GetElementPtrInst>(I))
161
+ Check = true ;
162
+
163
+ if (isa<PHINode>(I))
164
+ Check = true ;
165
+
166
+ // Skip intrinsics that don't actually represent code after lowering.
167
+ auto canSkipCall = [](Instruction *I) -> bool {
168
+ if (IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(I)) {
169
+ switch (Intr->getIntrinsicID ()) {
170
+ default :
171
+ break ;
172
+ case Intrinsic::assume:
173
+ case Intrinsic::dbg_declare:
174
+ case Intrinsic::dbg_value:
175
+ case Intrinsic::dbg_label:
176
+ case Intrinsic::lifetime_start:
177
+ case Intrinsic::lifetime_end:
178
+ return true ;
179
+ }
180
+ }
181
+ return false ;
182
+ };
183
+
184
+ if (canSkipCall (I))
185
+ Check = true ;
186
+
187
+ if (!Check)
188
+ return InstType::PreventScalar;
189
+
190
+ return InstType::CanBeScalar;
191
+ }
192
+
193
+ Value *BlockMemOpAddrScalarizationPass::insertBroadcast (Instruction *InstForBroadcast) {
194
+ Value *ShuffleRes = nullptr ;
195
+ Instruction *PlaceForInsert = nullptr ;
196
+
197
+ if (isa<PHINode>(InstForBroadcast))
198
+ PlaceForInsert = InstForBroadcast->getParent ()->getFirstNonPHI ();
199
+ else
200
+ PlaceForInsert = InstForBroadcast->getNextNonDebugInstruction ();
201
+
202
+ IRBuilder<> Builder (PlaceForInsert);
203
+
204
+ if (ExistingBroadcasts.count (InstForBroadcast)) {
205
+ // If broadcast was created before.
206
+ ShuffleRes = ExistingBroadcasts[InstForBroadcast];
207
+ } else {
208
+ Type *CurType = InstForBroadcast->getType ();
209
+ Value *ValForShuffle = nullptr ;
210
+
211
+ if (CurType->getScalarSizeInBits () == 1 )
212
+ ValForShuffle = Builder.CreateZExtOrTrunc (InstForBroadcast, Builder.getInt8Ty ());
213
+ else if (CurType->isPointerTy ())
214
+ ValForShuffle = Builder.CreatePtrToInt (InstForBroadcast, Builder.getInt64Ty ());
215
+ else
216
+ ValForShuffle = cast<Value>(InstForBroadcast);
217
+
218
+ Value *Args[3 ] = {ValForShuffle, Builder.getInt32 (0 ), Builder.getInt32 (0 )};
219
+ Type *Types[3 ] = {ValForShuffle->getType (), Builder.getInt32Ty (), Builder.getInt32Ty ()};
220
+ Function *BroadcastFunc = GenISAIntrinsic::getDeclaration (InstForBroadcast->getModule (),
221
+ GenISAIntrinsic::GenISA_WaveShuffleIndex,
222
+ Types);
223
+ Value *BroadcastCall = Builder.CreateCall (BroadcastFunc, Args);
224
+
225
+ if (CurType->getScalarSizeInBits () == 1 )
226
+ ShuffleRes = Builder.CreateZExtOrTrunc (BroadcastCall, CurType);
227
+ else if (CurType->isPointerTy ())
228
+ ShuffleRes = Builder.CreateIntToPtr (BroadcastCall, CurType);
229
+ else
230
+ ShuffleRes = BroadcastCall;
231
+
232
+ ExistingBroadcasts.insert ({InstForBroadcast, ShuffleRes});
233
+ }
234
+
235
+ return ShuffleRes;
236
+ }
237
+
238
+ bool BlockMemOpAddrScalarizationPass::scalarizeAddrArithmForBlockRdWr (GenIntrinsicInst *BlockInstr)
239
+ {
240
+ bool Scalarized = false ;
241
+ Instruction *AddrInstr = dyn_cast<Instruction>(BlockInstr->getOperand (0 ));
242
+ if (!AddrInstr)
243
+ return Scalarized;
244
+
245
+ // This map will contain instructions (keys) that will be broadcast, and instructions (values) where the result of the broadcast will be used.
246
+ DenseMap<Instruction*, SmallVector<Instruction*, 4 >> InstForBrd;
247
+
248
+ SmallVector<Instruction*, 2 > V = {AddrInstr};
249
+ // This vector contains the root instruction that was checked in previous steps and its operands that will be checked in the current step.
250
+ SmallVector<std::tuple<Instruction*, SmallVector<Instruction*, 2 >>, 2 > InstrVector = {{BlockInstr, V}};
251
+ while (InstrVector.size ()) {
252
+ // Data structure for further iterations.
253
+ SmallVector<std::tuple<Instruction*, SmallVector<Instruction*, 2 >>, 2 > NewInstrVector;
254
+ for (const auto &T : InstrVector) {
255
+ Instruction *Root = std::get<0 >(T);
256
+ for (Instruction *I : std::get<1 >(T)) {
257
+ std::tuple<Instruction*, SmallVector<Instruction*, 2 >> NewTuple = {I, SmallVector<Instruction*, 2 >()};
258
+ if (InstCanBeScalarized.count (I))
259
+ continue ;
260
+
261
+ // Check I instruction and its users.
262
+ if (canInstBeScalarized (I, Root)) {
263
+ InstCanBeScalarized.insert (I);
264
+
265
+ // Now lets check its arguments.
266
+ for (auto Op = I->op_begin (), E = I->op_end (); Op != E; Op++) {
267
+ if (Instruction *InOp = dyn_cast<Instruction>(*Op)) {
268
+ if (WI->isUniform (InOp))
269
+ continue ;
270
+
271
+ std::get<1 >(NewTuple).push_back (InOp);
272
+ }
273
+ }
274
+
275
+ NewInstrVector.push_back (NewTuple);
276
+ } else {
277
+ // Terminate the algorithm if the address is NOT used in any instructions other than BlockWrite/BlockRead.
278
+ if (I == BlockInstr->getOperand (0 ))
279
+ return Scalarized;
280
+
281
+ if (InstForBrd.count (I)) {
282
+ if (std::find (InstForBrd[I].begin (), InstForBrd[I].end (), Root) != InstForBrd[I].end ())
283
+ continue ;
284
+
285
+ InstForBrd[I].push_back (Root);
286
+ } else {
287
+ InstForBrd.insert ({I, {Root}});
288
+ }
289
+ }
290
+ }
291
+ }
292
+ // Update instructions list for next check.
293
+ InstrVector = NewInstrVector;
294
+ }
295
+
296
+ // Insert broadcast instructions
297
+ for (const auto &Item : InstForBrd) {
298
+ Instruction *InstForBrd = Item.first ;
299
+
300
+ if (GenIntrinsicInst *GenInst = dyn_cast<GenIntrinsicInst>(InstForBrd)) {
301
+ GenISAIntrinsic::ID Id = GenInst->getIntrinsicID ();
302
+ if (Id == GenISAIntrinsic::GenISA_WaveShuffleIndex)
303
+ continue ;
304
+ }
305
+
306
+ Value *BroadcastInstr = insertBroadcast (InstForBrd);
307
+ if (!BroadcastInstr)
308
+ continue ;
309
+ Scalarized = true ;
310
+
311
+ for (auto Root : Item.second ) {
312
+ size_t ArgNum = 0 ;
313
+ for (auto Op = Root->op_begin (), E = Root->op_end (); Op != E; Op++) {
314
+ if (dyn_cast<Instruction>(Op) == InstForBrd) {
315
+ Root->setOperand (ArgNum, BroadcastInstr);
316
+ break ;
317
+ }
318
+ ArgNum++;
319
+ }
320
+ }
321
+ }
322
+
323
+ return Scalarized;
324
+ }
0 commit comments