14
14
15
15
#include " AMDGPU.h"
16
16
#include " AMDGPUTargetMachine.h"
17
+ #include " AMDGPUTargetTransformInfo.h"
17
18
#include " llvm/Analysis/AssumptionCache.h"
18
19
#include " llvm/Analysis/UniformityAnalysis.h"
19
20
#include " llvm/Analysis/ValueTracking.h"
@@ -45,6 +46,7 @@ class AMDGPULateCodeGenPrepare
45
46
Function &F;
46
47
const DataLayout &DL;
47
48
const GCNSubtarget &ST;
49
+ const TargetTransformInfo &TTI;
48
50
49
51
AssumptionCache *const AC;
50
52
UniformityInfo &UA;
@@ -53,8 +55,9 @@ class AMDGPULateCodeGenPrepare
53
55
54
56
public:
55
57
AMDGPULateCodeGenPrepare (Function &F, const GCNSubtarget &ST,
56
- AssumptionCache *AC, UniformityInfo &UA)
57
- : F(F), DL(F.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
58
+ const TargetTransformInfo &TTI, AssumptionCache *AC,
59
+ UniformityInfo &UA)
60
+ : F(F), DL(F.getDataLayout()), ST(ST), TTI(TTI), AC(AC), UA(UA) {}
58
61
bool run ();
59
62
bool visitInstruction (Instruction &) { return false ; }
60
63
@@ -75,6 +78,8 @@ class LiveRegOptimizer {
75
78
Module &Mod;
76
79
const DataLayout &DL;
77
80
const GCNSubtarget &ST;
81
+ const TargetTransformInfo &TTI;
82
+
78
83
// / The scalar type to convert to
79
84
Type *const ConvertToScalar;
80
85
// / The set of visited Instructions
@@ -125,8 +130,41 @@ class LiveRegOptimizer {
125
130
return LK.first != TargetLoweringBase::TypeLegal;
126
131
}
127
132
128
- LiveRegOptimizer (Module &Mod, const GCNSubtarget &ST)
129
- : Mod(Mod), DL(Mod.getDataLayout()), ST(ST),
133
+ // Filtering based on operation or its cost.
134
+ // If an operation incurs high enough cost or natively work on
135
+ // vector of illegal type, ie. v2i8, then it makes sense to try
136
+ // to avoid scalarizing across BB.
137
+ bool shouldReplaceBasedOnOp (Instruction *II) {
138
+ // Ignore pseudos
139
+ if (II->isDebugOrPseudoInst ())
140
+ return false ;
141
+
142
+ // Instruction Cost
143
+ const auto Cost = TTI.getInstructionCost (
144
+ II, TargetTransformInfo::TargetCostKind::TCK_SizeAndLatency);
145
+ LLVM_DEBUG (dbgs () << " shouldReplaceBasedOnOp: " << *II << " Cost=" << Cost
146
+ << ' \n ' ;);
147
+ if (Cost >= 8 )
148
+ return true ;
149
+
150
+ // Intrinsics - assume they natively handle illegal type
151
+ if (dyn_cast<IntrinsicInst>(II))
152
+ return true ;
153
+
154
+ // Stores
155
+ if (dyn_cast<StoreInst>(II))
156
+ return true ;
157
+
158
+ // Shuffles
159
+ if (dyn_cast<ShuffleVectorInst>(II))
160
+ return true ;
161
+
162
+ return false ;
163
+ }
164
+
165
+ LiveRegOptimizer (Module &Mod, const GCNSubtarget &ST,
166
+ const TargetTransformInfo &TTI)
167
+ : Mod(Mod), DL(Mod.getDataLayout()), ST(ST), TTI(TTI),
130
168
ConvertToScalar (Type::getInt32Ty(Mod.getContext())) {}
131
169
};
132
170
@@ -140,7 +178,7 @@ bool AMDGPULateCodeGenPrepare::run() {
140
178
// vectors to equivalent vectors of legal type (which are converted back
141
179
// before uses in subsequent blocks), to pack the bits into fewer physical
142
180
// registers (used in CopyToReg/CopyFromReg pairs).
143
- LiveRegOptimizer LRO (*F.getParent (), ST);
181
+ LiveRegOptimizer LRO (*F.getParent (), ST, TTI );
144
182
145
183
bool Changed = false ;
146
184
@@ -259,6 +297,9 @@ bool LiveRegOptimizer::optimizeLiveType(
259
297
if (!shouldReplace (II->getType ()))
260
298
continue ;
261
299
300
+ if (!shouldReplaceBasedOnOp (II))
301
+ continue ;
302
+
262
303
if (PHINode *Phi = dyn_cast<PHINode>(II)) {
263
304
PhiNodes.insert (Phi);
264
305
// Collect all the incoming values of problematic PHI nodes.
@@ -478,11 +519,12 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
478
519
PreservedAnalyses
479
520
AMDGPULateCodeGenPreparePass::run (Function &F, FunctionAnalysisManager &FAM) {
480
521
const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
522
+ const TargetTransformInfo &TTI = TM.getTargetTransformInfo (F);
481
523
482
524
AssumptionCache &AC = FAM.getResult <AssumptionAnalysis>(F);
483
525
UniformityInfo &UI = FAM.getResult <UniformityInfoAnalysis>(F);
484
526
485
- bool Changed = AMDGPULateCodeGenPrepare (F, ST, &AC, UI).run ();
527
+ bool Changed = AMDGPULateCodeGenPrepare (F, ST, TTI, &AC, UI).run ();
486
528
487
529
if (!Changed)
488
530
return PreservedAnalyses::all ();
@@ -518,13 +560,14 @@ bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) {
518
560
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
519
561
const TargetMachine &TM = TPC.getTM <TargetMachine>();
520
562
const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
563
+ const TargetTransformInfo &TTI = TM.getTargetTransformInfo (F);
521
564
522
565
AssumptionCache &AC =
523
566
getAnalysis<AssumptionCacheTracker>().getAssumptionCache (F);
524
567
UniformityInfo &UI =
525
568
getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo ();
526
569
527
- return AMDGPULateCodeGenPrepare (F, ST, &AC, UI).run ();
570
+ return AMDGPULateCodeGenPrepare (F, ST, TTI, &AC, UI).run ();
528
571
}
529
572
530
573
INITIALIZE_PASS_BEGIN (AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
0 commit comments