14
14
15
15
#include " AMDGPU.h"
16
16
#include " AMDGPUTargetMachine.h"
17
+ #include " AMDGPUTargetTransformInfo.h"
17
18
#include " llvm/Analysis/AssumptionCache.h"
18
19
#include " llvm/Analysis/UniformityAnalysis.h"
19
20
#include " llvm/Analysis/ValueTracking.h"
@@ -45,6 +46,7 @@ class AMDGPULateCodeGenPrepare
45
46
Function &F;
46
47
const DataLayout &DL;
47
48
const GCNSubtarget &ST;
49
+ const TargetTransformInfo &TTI;
48
50
49
51
AssumptionCache *const AC;
50
52
UniformityInfo &UA;
@@ -53,8 +55,9 @@ class AMDGPULateCodeGenPrepare
53
55
54
56
public:
55
57
AMDGPULateCodeGenPrepare (Function &F, const GCNSubtarget &ST,
58
+ const TargetTransformInfo &TTI,
56
59
AssumptionCache *AC, UniformityInfo &UA)
57
- : F(F), DL(F.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
60
+ : F(F), DL(F.getDataLayout()), ST(ST), TTI(TTI), AC(AC), UA(UA) {}
58
61
bool run ();
59
62
bool visitInstruction (Instruction &) { return false ; }
60
63
@@ -75,6 +78,8 @@ class LiveRegOptimizer {
75
78
Module &Mod;
76
79
const DataLayout &DL;
77
80
const GCNSubtarget &ST;
81
+ const TargetTransformInfo &TTI;
82
+
78
83
// / The scalar type to convert to
79
84
Type *const ConvertToScalar;
80
85
// / The set of visited Instructions
@@ -125,8 +130,43 @@ class LiveRegOptimizer {
125
130
return LK.first != TargetLoweringBase::TypeLegal;
126
131
}
127
132
128
- LiveRegOptimizer (Module &Mod, const GCNSubtarget &ST)
129
- : Mod(Mod), DL(Mod.getDataLayout()), ST(ST),
133
+ // Filtering based on operation or its cost.
134
+ // If an operation incurs high enough cost or natively work on
135
+ // vector of illegal type, ie. v2i8, then it makes sense to try
136
+ // to avoid scalarizing across BB.
137
+ bool shouldReplaceBasedOnOp (Instruction *II) {
138
+ // Ignore pseudos
139
+ if (II->isDebugOrPseudoInst ())
140
+ return false ;
141
+
142
+ // Instruction Cost
143
+ const auto Cost = TTI.getInstructionCost (II,
144
+ TargetTransformInfo::TargetCostKind::TCK_SizeAndLatency);
145
+ LLVM_DEBUG (
146
+ dbgs () << " shouldReplaceBasedOnOp: " <<
147
+ *II << " Cost=" << Cost << ' \n ' ;
148
+ );
149
+ if (Cost >= 8 )
150
+ return true ;
151
+
152
+ // Intrinsics - assume they natively handle illegal type
153
+ if (dyn_cast<IntrinsicInst>(II))
154
+ return true ;
155
+
156
+ // Stores
157
+ if (dyn_cast<StoreInst>(II))
158
+ return true ;
159
+
160
+ // Shuffles
161
+ if (dyn_cast<ShuffleVectorInst>(II))
162
+ return true ;
163
+
164
+ return false ;
165
+ }
166
+
167
+ LiveRegOptimizer (Module &Mod, const GCNSubtarget &ST,
168
+ const TargetTransformInfo &TTI)
169
+ : Mod(Mod), DL(Mod.getDataLayout()), ST(ST), TTI(TTI),
130
170
ConvertToScalar (Type::getInt32Ty(Mod.getContext())) {}
131
171
};
132
172
@@ -140,7 +180,7 @@ bool AMDGPULateCodeGenPrepare::run() {
140
180
// vectors to equivalent vectors of legal type (which are converted back
141
181
// before uses in subsequent blocks), to pack the bits into fewer physical
142
182
// registers (used in CopyToReg/CopyFromReg pairs).
143
- LiveRegOptimizer LRO (*F.getParent (), ST);
183
+ LiveRegOptimizer LRO (*F.getParent (), ST, TTI );
144
184
145
185
bool Changed = false ;
146
186
@@ -259,6 +299,9 @@ bool LiveRegOptimizer::optimizeLiveType(
259
299
if (!shouldReplace (II->getType ()))
260
300
continue ;
261
301
302
+ if (!shouldReplaceBasedOnOp (II))
303
+ continue ;
304
+
262
305
if (PHINode *Phi = dyn_cast<PHINode>(II)) {
263
306
PhiNodes.insert (Phi);
264
307
// Collect all the incoming values of problematic PHI nodes.
@@ -478,11 +521,12 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
478
521
PreservedAnalyses
479
522
AMDGPULateCodeGenPreparePass::run (Function &F, FunctionAnalysisManager &FAM) {
480
523
const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
524
+ const TargetTransformInfo &TTI = TM.getTargetTransformInfo (F);
481
525
482
526
AssumptionCache &AC = FAM.getResult <AssumptionAnalysis>(F);
483
527
UniformityInfo &UI = FAM.getResult <UniformityInfoAnalysis>(F);
484
528
485
- bool Changed = AMDGPULateCodeGenPrepare (F, ST, &AC, UI).run ();
529
+ bool Changed = AMDGPULateCodeGenPrepare (F, ST, TTI, &AC, UI).run ();
486
530
487
531
if (!Changed)
488
532
return PreservedAnalyses::all ();
@@ -518,13 +562,14 @@ bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) {
518
562
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
519
563
const TargetMachine &TM = TPC.getTM <TargetMachine>();
520
564
const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
565
+ const TargetTransformInfo &TTI = TM.getTargetTransformInfo (F);
521
566
522
567
AssumptionCache &AC =
523
568
getAnalysis<AssumptionCacheTracker>().getAssumptionCache (F);
524
569
UniformityInfo &UI =
525
570
getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo ();
526
571
527
- return AMDGPULateCodeGenPrepare (F, ST, &AC, UI).run ();
572
+ return AMDGPULateCodeGenPrepare (F, ST, TTI, &AC, UI).run ();
528
573
}
529
574
530
575
INITIALIZE_PASS_BEGIN (AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
0 commit comments