@@ -27,6 +27,20 @@ static const int MaxVecSize = 4;
27
27
28
28
using namespace llvm ;
29
29
30
+ // Recursively creates an array-like version of a given vector type.
31
+ static Type *equivalentArrayTypeFromVector (Type *T) {
32
+ if (auto *VecTy = dyn_cast<VectorType>(T))
33
+ return ArrayType::get (VecTy->getElementType (),
34
+ dyn_cast<FixedVectorType>(VecTy)->getNumElements ());
35
+ if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
36
+ Type *NewElementType =
37
+ equivalentArrayTypeFromVector (ArrayTy->getElementType ());
38
+ return ArrayType::get (NewElementType, ArrayTy->getNumElements ());
39
+ }
40
+ // If it's not a vector or array, return the original type.
41
+ return T;
42
+ }
43
+
30
44
class DXILDataScalarizationLegacy : public ModulePass {
31
45
32
46
public:
@@ -54,8 +68,8 @@ class DataScalarizerVisitor : public InstVisitor<DataScalarizerVisitor, bool> {
54
68
bool visitGetElementPtrInst (GetElementPtrInst &GEPI);
55
69
bool visitCastInst (CastInst &CI) { return false ; }
56
70
bool visitBitCastInst (BitCastInst &BCI) { return false ; }
57
- bool visitInsertElementInst (InsertElementInst &IEI) { return false ; }
58
- bool visitExtractElementInst (ExtractElementInst &EEI) { return false ; }
71
+ bool visitInsertElementInst (InsertElementInst &IEI);
72
+ bool visitExtractElementInst (ExtractElementInst &EEI);
59
73
bool visitShuffleVectorInst (ShuffleVectorInst &SVI) { return false ; }
60
74
bool visitPHINode (PHINode &PHI) { return false ; }
61
75
bool visitLoadInst (LoadInst &LI);
@@ -65,6 +79,16 @@ class DataScalarizerVisitor : public InstVisitor<DataScalarizerVisitor, bool> {
65
79
friend bool findAndReplaceVectors (llvm::Module &M);
66
80
67
81
private:
82
+ typedef std::pair<AllocaInst *, SmallVector<Value *, 4 >> AllocaAndGEPs;
83
+ typedef SmallDenseMap<Value *, AllocaAndGEPs>
84
+ VectorToArrayMap; // A map from a vector-typed Value to its corresponding
85
+ // AllocaInst and GEPs to each element of an array
86
+ VectorToArrayMap VectorAllocaMap;
87
+ AllocaAndGEPs createArrayFromVector (IRBuilder<> &Builder, Value *Vec,
88
+ const Twine &Name);
89
+ bool replaceDynamicInsertElementInst (InsertElementInst &IEI);
90
+ bool replaceDynamicExtractElementInst (ExtractElementInst &EEI);
91
+
68
92
GlobalVariable *lookupReplacementGlobal (Value *CurrOperand);
69
93
DenseMap<GlobalVariable *, GlobalVariable *> GlobalMap;
70
94
};
@@ -76,6 +100,7 @@ bool DataScalarizerVisitor::visit(Function &F) {
76
100
for (Instruction &I : make_early_inc_range (*BB))
77
101
MadeChange |= InstVisitor::visit (I);
78
102
}
103
+ VectorAllocaMap.clear ();
79
104
return MadeChange;
80
105
}
81
106
@@ -90,20 +115,6 @@ DataScalarizerVisitor::lookupReplacementGlobal(Value *CurrOperand) {
90
115
return nullptr ; // Not found
91
116
}
92
117
93
- // Recursively creates an array version of the given vector type.
94
- static Type *replaceVectorWithArray (Type *T, LLVMContext &Ctx) {
95
- if (auto *VecTy = dyn_cast<VectorType>(T))
96
- return ArrayType::get (VecTy->getElementType (),
97
- dyn_cast<FixedVectorType>(VecTy)->getNumElements ());
98
- if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
99
- Type *NewElementType =
100
- replaceVectorWithArray (ArrayTy->getElementType (), Ctx);
101
- return ArrayType::get (NewElementType, ArrayTy->getNumElements ());
102
- }
103
- // If it's not a vector or array, return the original type.
104
- return T;
105
- }
106
-
107
118
static bool isArrayOfVectors (Type *T) {
108
119
if (ArrayType *ArrType = dyn_cast<ArrayType>(T))
109
120
return isa<VectorType>(ArrType->getElementType ());
@@ -116,8 +127,7 @@ bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) {
116
127
117
128
ArrayType *ArrType = cast<ArrayType>(AI.getAllocatedType ());
118
129
IRBuilder<> Builder (&AI);
119
- LLVMContext &Ctx = AI.getContext ();
120
- Type *NewType = replaceVectorWithArray (ArrType, Ctx);
130
+ Type *NewType = equivalentArrayTypeFromVector (ArrType);
121
131
AllocaInst *ArrAlloca =
122
132
Builder.CreateAlloca (NewType, nullptr , AI.getName () + " .scalarize" );
123
133
ArrAlloca->setAlignment (AI.getAlign ());
@@ -173,6 +183,124 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) {
173
183
return false ;
174
184
}
175
185
186
+ DataScalarizerVisitor::AllocaAndGEPs
187
+ DataScalarizerVisitor::createArrayFromVector (IRBuilder<> &Builder, Value *Vec,
188
+ const Twine &Name = " " ) {
189
+ // If there is already an alloca for this vector, return it
190
+ if (VectorAllocaMap.contains (Vec))
191
+ return VectorAllocaMap[Vec];
192
+
193
+ auto InsertPoint = Builder.GetInsertPoint ();
194
+
195
+ // Allocate the array to hold the vector elements
196
+ Builder.SetInsertPointPastAllocas (Builder.GetInsertBlock ()->getParent ());
197
+ Type *ArrTy = equivalentArrayTypeFromVector (Vec->getType ());
198
+ AllocaInst *ArrAlloca =
199
+ Builder.CreateAlloca (ArrTy, nullptr , Name + " .alloca" );
200
+ const uint64_t ArrNumElems = ArrTy->getArrayNumElements ();
201
+
202
+ // Create loads and stores to populate the array immediately after the
203
+ // original vector's defining instruction if available, else immediately after
204
+ // the alloca
205
+ if (auto *Instr = dyn_cast<Instruction>(Vec))
206
+ Builder.SetInsertPoint (Instr->getNextNonDebugInstruction ());
207
+ SmallVector<Value *, 4 > GEPs (ArrNumElems);
208
+ for (unsigned I = 0 ; I < ArrNumElems; ++I) {
209
+ Value *EE = Builder.CreateExtractElement (Vec, I, Name + " .extract" );
210
+ GEPs[I] = Builder.CreateInBoundsGEP (
211
+ ArrTy, ArrAlloca, {Builder.getInt32 (0 ), Builder.getInt32 (I)},
212
+ Name + " .index" );
213
+ Builder.CreateStore (EE, GEPs[I]);
214
+ }
215
+
216
+ VectorAllocaMap.insert ({Vec, {ArrAlloca, GEPs}});
217
+ Builder.SetInsertPoint (InsertPoint);
218
+ return {ArrAlloca, GEPs};
219
+ }
220
+
221
+ // / Returns a pair of Value* with the first being a GEP into ArrAlloca using
222
+ // / indices {0, Index}, and the second Value* being a Load of the GEP
223
+ static std::pair<Value *, Value *>
224
+ dynamicallyLoadArray (IRBuilder<> &Builder, AllocaInst *ArrAlloca, Value *Index,
225
+ const Twine &Name = " " ) {
226
+ Type *ArrTy = ArrAlloca->getAllocatedType ();
227
+ Value *GEP = Builder.CreateInBoundsGEP (
228
+ ArrTy, ArrAlloca, {Builder.getInt32 (0 ), Index}, Name + " .index" );
229
+ Value *Load =
230
+ Builder.CreateLoad (ArrTy->getArrayElementType (), GEP, Name + " .load" );
231
+ return std::make_pair (GEP, Load);
232
+ }
233
+
234
+ bool DataScalarizerVisitor::replaceDynamicInsertElementInst (
235
+ InsertElementInst &IEI) {
236
+ IRBuilder<> Builder (&IEI);
237
+
238
+ Value *Vec = IEI.getOperand (0 );
239
+ Value *Val = IEI.getOperand (1 );
240
+ Value *Index = IEI.getOperand (2 );
241
+
242
+ AllocaAndGEPs ArrAllocaAndGEPs =
243
+ createArrayFromVector (Builder, Vec, IEI.getName ());
244
+ AllocaInst *ArrAlloca = ArrAllocaAndGEPs.first ;
245
+ Type *ArrTy = ArrAlloca->getAllocatedType ();
246
+ SmallVector<Value *, 4 > &ArrGEPs = ArrAllocaAndGEPs.second ;
247
+
248
+ auto GEPAndLoad =
249
+ dynamicallyLoadArray (Builder, ArrAlloca, Index, IEI.getName ());
250
+ Value *GEP = GEPAndLoad.first ;
251
+ Value *Load = GEPAndLoad.second ;
252
+
253
+ Builder.CreateStore (Val, GEP);
254
+ Value *NewIEI = PoisonValue::get (Vec->getType ());
255
+ for (unsigned I = 0 ; I < ArrTy->getArrayNumElements (); ++I) {
256
+ Value *Load = Builder.CreateLoad (ArrTy->getArrayElementType (), ArrGEPs[I],
257
+ IEI.getName () + " .load" );
258
+ NewIEI = Builder.CreateInsertElement (NewIEI, Load, Builder.getInt32 (I),
259
+ IEI.getName () + " .insert" );
260
+ }
261
+
262
+ // Store back the original value so the Alloca can be reused for subsequent
263
+ // insertelement instructions on the same vector
264
+ Builder.CreateStore (Load, GEP);
265
+
266
+ IEI.replaceAllUsesWith (NewIEI);
267
+ IEI.eraseFromParent ();
268
+ return true ;
269
+ }
270
+
271
+ bool DataScalarizerVisitor::visitInsertElementInst (InsertElementInst &IEI) {
272
+ // If the index is a constant then we don't need to scalarize it
273
+ Value *Index = IEI.getOperand (2 );
274
+ if (isa<ConstantInt>(Index))
275
+ return false ;
276
+ return replaceDynamicInsertElementInst (IEI);
277
+ }
278
+
279
+ bool DataScalarizerVisitor::replaceDynamicExtractElementInst (
280
+ ExtractElementInst &EEI) {
281
+ IRBuilder<> Builder (&EEI);
282
+
283
+ AllocaAndGEPs ArrAllocaAndGEPs =
284
+ createArrayFromVector (Builder, EEI.getVectorOperand (), EEI.getName ());
285
+ AllocaInst *ArrAlloca = ArrAllocaAndGEPs.first ;
286
+
287
+ auto GEPAndLoad = dynamicallyLoadArray (Builder, ArrAlloca,
288
+ EEI.getIndexOperand (), EEI.getName ());
289
+ Value *Load = GEPAndLoad.second ;
290
+
291
+ EEI.replaceAllUsesWith (Load);
292
+ EEI.eraseFromParent ();
293
+ return true ;
294
+ }
295
+
296
+ bool DataScalarizerVisitor::visitExtractElementInst (ExtractElementInst &EEI) {
297
+ // If the index is a constant then we don't need to scalarize it
298
+ Value *Index = EEI.getIndexOperand ();
299
+ if (isa<ConstantInt>(Index))
300
+ return false ;
301
+ return replaceDynamicExtractElementInst (EEI);
302
+ }
303
+
176
304
bool DataScalarizerVisitor::visitGetElementPtrInst (GetElementPtrInst &GEPI) {
177
305
178
306
unsigned NumOperands = GEPI.getNumOperands ();
@@ -197,8 +325,8 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
197
325
return true ;
198
326
}
199
327
200
- Constant *transformInitializer (Constant *Init, Type *OrigType, Type *NewType ,
201
- LLVMContext &Ctx) {
328
+ static Constant *transformInitializer (Constant *Init, Type *OrigType,
329
+ Type *NewType, LLVMContext &Ctx) {
202
330
// Handle ConstantAggregateZero (zero-initialized constants)
203
331
if (isa<ConstantAggregateZero>(Init)) {
204
332
return ConstantAggregateZero::get (NewType);
@@ -257,7 +385,7 @@ static bool findAndReplaceVectors(Module &M) {
257
385
for (GlobalVariable &G : M.globals ()) {
258
386
Type *OrigType = G.getValueType ();
259
387
260
- Type *NewType = replaceVectorWithArray (OrigType, Ctx );
388
+ Type *NewType = equivalentArrayTypeFromVector (OrigType);
261
389
if (OrigType != NewType) {
262
390
// Create a new global variable with the updated type
263
391
// Note: Initializer is set via transformInitializer
0 commit comments