@@ -42,25 +42,25 @@ namespace {
42
42
43
43
class AMDGPULateCodeGenPrepare
44
44
: public InstVisitor<AMDGPULateCodeGenPrepare, bool > {
45
- Module *Mod = nullptr ;
46
- const DataLayout *DL = nullptr ;
45
+ Function &F ;
46
+ const DataLayout &DL ;
47
47
const GCNSubtarget &ST;
48
48
49
- AssumptionCache *AC = nullptr ;
50
- UniformityInfo *UA = nullptr ;
49
+ AssumptionCache *const AC ;
50
+ UniformityInfo &UA ;
51
51
52
52
SmallVector<WeakTrackingVH, 8 > DeadInsts;
53
53
54
54
public:
55
- AMDGPULateCodeGenPrepare (Module &M , const GCNSubtarget &ST,
56
- AssumptionCache *AC, UniformityInfo * UA)
57
- : Mod(&M ), DL(&M .getDataLayout()), ST(ST), AC(AC), UA(UA) {}
58
- bool run (Function &F );
55
+ AMDGPULateCodeGenPrepare (Function &F , const GCNSubtarget &ST,
56
+ AssumptionCache *AC, UniformityInfo & UA)
57
+ : F(F ), DL(F .getDataLayout()), ST(ST), AC(AC), UA(UA) {}
58
+ bool run ();
59
59
bool visitInstruction (Instruction &) { return false ; }
60
60
61
61
// Check if the specified value is at least DWORD aligned.
62
62
bool isDWORDAligned (const Value *V) const {
63
- KnownBits Known = computeKnownBits (V, * DL, 0 , AC);
63
+ KnownBits Known = computeKnownBits (V, DL, 0 , AC);
64
64
return Known.countMinTrailingZeros () >= 2 ;
65
65
}
66
66
@@ -72,11 +72,11 @@ using ValueToValueMap = DenseMap<const Value *, Value *>;
72
72
73
73
class LiveRegOptimizer {
74
74
private:
75
- Module * Mod = nullptr ;
76
- const DataLayout *DL = nullptr ;
77
- const GCNSubtarget * ST;
75
+ Module & Mod;
76
+ const DataLayout &DL ;
77
+ const GCNSubtarget & ST;
78
78
// / The scalar type to convert to
79
- Type *ConvertToScalar;
79
+ Type *const ConvertToScalar;
80
80
// / The set of visited Instructions
81
81
SmallPtrSet<Instruction *, 4 > Visited;
82
82
// / Map of Value -> Converted Value
@@ -110,7 +110,7 @@ class LiveRegOptimizer {
110
110
if (!VTy)
111
111
return false ;
112
112
113
- const auto *TLI = ST-> getTargetLowering ();
113
+ const auto *TLI = ST. getTargetLowering ();
114
114
115
115
Type *EltTy = VTy->getElementType ();
116
116
// If the element size is not less than the convert to scalar size, then we
@@ -125,23 +125,22 @@ class LiveRegOptimizer {
125
125
return LK.first != TargetLoweringBase::TypeLegal;
126
126
}
127
127
128
- LiveRegOptimizer (Module *Mod, const GCNSubtarget *ST) : Mod(Mod), ST(ST) {
129
- DL = &Mod->getDataLayout ();
130
- ConvertToScalar = Type::getInt32Ty (Mod->getContext ());
131
- }
128
+ LiveRegOptimizer (Module &Mod, const GCNSubtarget &ST)
129
+ : Mod(Mod), DL(Mod.getDataLayout()), ST(ST),
130
+ ConvertToScalar (Type::getInt32Ty(Mod.getContext())) {}
132
131
};
133
132
134
133
} // end anonymous namespace
135
134
136
- bool AMDGPULateCodeGenPrepare::run (Function &F ) {
135
+ bool AMDGPULateCodeGenPrepare::run () {
137
136
// "Optimize" the virtual regs that cross basic block boundaries. When
138
137
// building the SelectionDAG, vectors of illegal types that cross basic blocks
139
138
// will be scalarized and widened, with each scalar living in its
140
139
// own register. To work around this, this optimization converts the
141
140
// vectors to equivalent vectors of legal type (which are converted back
142
141
// before uses in subsequent blocks), to pack the bits into fewer physical
143
142
// registers (used in CopyToReg/CopyFromReg pairs).
144
- LiveRegOptimizer LRO (Mod, & ST);
143
+ LiveRegOptimizer LRO (*F. getParent (), ST);
145
144
146
145
bool Changed = false ;
147
146
@@ -163,15 +162,15 @@ Type *LiveRegOptimizer::calculateConvertType(Type *OriginalType) {
163
162
164
163
FixedVectorType *VTy = cast<FixedVectorType>(OriginalType);
165
164
166
- TypeSize OriginalSize = DL-> getTypeSizeInBits (VTy);
167
- TypeSize ConvertScalarSize = DL-> getTypeSizeInBits (ConvertToScalar);
165
+ TypeSize OriginalSize = DL. getTypeSizeInBits (VTy);
166
+ TypeSize ConvertScalarSize = DL. getTypeSizeInBits (ConvertToScalar);
168
167
unsigned ConvertEltCount =
169
168
(OriginalSize + ConvertScalarSize - 1 ) / ConvertScalarSize;
170
169
171
170
if (OriginalSize <= ConvertScalarSize)
172
- return IntegerType::get (Mod-> getContext (), ConvertScalarSize);
171
+ return IntegerType::get (Mod. getContext (), ConvertScalarSize);
173
172
174
- return VectorType::get (Type::getIntNTy (Mod-> getContext (), ConvertScalarSize),
173
+ return VectorType::get (Type::getIntNTy (Mod. getContext (), ConvertScalarSize),
175
174
ConvertEltCount, false );
176
175
}
177
176
@@ -180,8 +179,8 @@ Value *LiveRegOptimizer::convertToOptType(Instruction *V,
180
179
FixedVectorType *VTy = cast<FixedVectorType>(V->getType ());
181
180
Type *NewTy = calculateConvertType (V->getType ());
182
181
183
- TypeSize OriginalSize = DL-> getTypeSizeInBits (VTy);
184
- TypeSize NewSize = DL-> getTypeSizeInBits (NewTy);
182
+ TypeSize OriginalSize = DL. getTypeSizeInBits (VTy);
183
+ TypeSize NewSize = DL. getTypeSizeInBits (NewTy);
185
184
186
185
IRBuilder<> Builder (V->getParent (), InsertPt);
187
186
// If there is a bitsize match, we can fit the old vector into a new vector of
@@ -210,8 +209,8 @@ Value *LiveRegOptimizer::convertFromOptType(Type *ConvertType, Instruction *V,
210
209
BasicBlock *InsertBB) {
211
210
FixedVectorType *NewVTy = cast<FixedVectorType>(ConvertType);
212
211
213
- TypeSize OriginalSize = DL-> getTypeSizeInBits (V->getType ());
214
- TypeSize NewSize = DL-> getTypeSizeInBits (NewVTy);
212
+ TypeSize OriginalSize = DL. getTypeSizeInBits (V->getType ());
213
+ TypeSize NewSize = DL. getTypeSizeInBits (NewVTy);
215
214
216
215
IRBuilder<> Builder (InsertBB, InsertPt);
217
216
// If there is a bitsize match, we simply convert back to the original type.
@@ -224,14 +223,14 @@ Value *LiveRegOptimizer::convertFromOptType(Type *ConvertType, Instruction *V,
224
223
// For wide scalars, we can just truncate the value.
225
224
if (!V->getType ()->isVectorTy ()) {
226
225
Instruction *Trunc = cast<Instruction>(
227
- Builder.CreateTrunc (V, IntegerType::get (Mod-> getContext (), NewSize)));
226
+ Builder.CreateTrunc (V, IntegerType::get (Mod. getContext (), NewSize)));
228
227
return cast<Instruction>(Builder.CreateBitCast (Trunc, NewVTy));
229
228
}
230
229
231
230
// For wider vectors, we must strip the MSBs to convert back to the original
232
231
// type.
233
232
VectorType *ExpandedVT = VectorType::get (
234
- Type::getIntNTy (Mod-> getContext (), NewVTy->getScalarSizeInBits ()),
233
+ Type::getIntNTy (Mod. getContext (), NewVTy->getScalarSizeInBits ()),
235
234
(OriginalSize / NewVTy->getScalarSizeInBits ()), false );
236
235
Instruction *Converted =
237
236
cast<Instruction>(Builder.CreateBitCast (V, ExpandedVT));
@@ -410,15 +409,15 @@ bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {
410
409
// Skip aggregate types.
411
410
if (Ty->isAggregateType ())
412
411
return false ;
413
- unsigned TySize = DL-> getTypeStoreSize (Ty);
412
+ unsigned TySize = DL. getTypeStoreSize (Ty);
414
413
// Only handle sub-DWORD loads.
415
414
if (TySize >= 4 )
416
415
return false ;
417
416
// That load must be at least naturally aligned.
418
- if (LI.getAlign () < DL-> getABITypeAlign (Ty))
417
+ if (LI.getAlign () < DL. getABITypeAlign (Ty))
419
418
return false ;
420
419
// It should be uniform, i.e. a scalar load.
421
- return UA-> isUniform (&LI);
420
+ return UA. isUniform (&LI);
422
421
}
423
422
424
423
bool AMDGPULateCodeGenPrepare::visitLoadInst (LoadInst &LI) {
@@ -435,7 +434,7 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
435
434
436
435
int64_t Offset = 0 ;
437
436
auto *Base =
438
- GetPointerBaseWithConstantOffset (LI.getPointerOperand (), Offset, * DL);
437
+ GetPointerBaseWithConstantOffset (LI.getPointerOperand (), Offset, DL);
439
438
// If that base is not DWORD aligned, it's not safe to perform the following
440
439
// transforms.
441
440
if (!isDWORDAligned (Base))
@@ -452,7 +451,7 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
452
451
IRBuilder<> IRB (&LI);
453
452
IRB.SetCurrentDebugLocation (LI.getDebugLoc ());
454
453
455
- unsigned LdBits = DL-> getTypeStoreSizeInBits (LI.getType ());
454
+ unsigned LdBits = DL. getTypeStoreSizeInBits (LI.getType ());
456
455
auto *IntNTy = Type::getIntNTy (LI.getContext (), LdBits);
457
456
458
457
auto *NewPtr = IRB.CreateConstGEP1_64 (
@@ -480,9 +479,7 @@ AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) {
480
479
AssumptionCache &AC = FAM.getResult <AssumptionAnalysis>(F);
481
480
UniformityInfo &UI = FAM.getResult <UniformityInfoAnalysis>(F);
482
481
483
- AMDGPULateCodeGenPrepare Impl (*F.getParent (), ST, &AC, &UI);
484
-
485
- bool Changed = Impl.run (F);
482
+ bool Changed = AMDGPULateCodeGenPrepare (F, ST, &AC, UI).run ();
486
483
487
484
if (!Changed)
488
485
return PreservedAnalyses::all ();
@@ -524,9 +521,7 @@ bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) {
524
521
UniformityInfo &UI =
525
522
getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo ();
526
523
527
- AMDGPULateCodeGenPrepare Impl (*F.getParent (), ST, &AC, &UI);
528
-
529
- return Impl.run (F);
524
+ return AMDGPULateCodeGenPrepare (F, ST, &AC, UI).run ();
530
525
}
531
526
532
527
INITIALIZE_PASS_BEGIN (AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
0 commit comments