1
- #include < llvm/CodeGen/MachineBasicBlock.h>
2
- #include < llvm/CodeGen/MachineBranchProbabilityInfo.h>
3
- #include < llvm/CodeGen/MachineDominators.h>
4
- #include < llvm/CodeGen/MachineFunctionPass.h>
5
- #include < llvm/CodeGen/MachineLoopInfo.h>
6
- #include < llvm/CodeGen/SSAIfConv.h>
7
- #include < llvm/CodeGen/TargetInstrInfo.h>
8
- #include < llvm/CodeGen/TargetRegisterInfo.h>
9
- #include < llvm/CodeGen/TargetSchedule.h>
10
- #include < llvm/CodeGen/TargetSubtargetInfo.h>
11
- #include < llvm/InitializePasses.h>
1
+ #include " llvm/CodeGen/MachineFunctionPass.h"
2
+ #include " llvm/CodeGen/MachineRegisterInfo.h"
3
+ #include " llvm/CodeGen/SSAIfConv.h"
12
4
13
5
#include " AMDGPU.h"
6
+ #include " GCNSubtarget.h"
7
+ #include " SIInstrInfo.h"
8
+ #include " SIMachineFunctionInfo.h"
14
9
15
10
using namespace llvm ;
16
11
17
- namespace {
18
12
#define DEBUG_TYPE " amdgpu-if-cvt"
19
- const char PassName[] = " AMDGPU if conversion" ;
20
-
21
- class AMDGPUIfConverter : public MachineFunctionPass {
22
- const TargetInstrInfo *TII = nullptr ;
23
- const TargetRegisterInfo *TRI = nullptr ;
24
- TargetSchedModel SchedModel;
25
- MachineRegisterInfo *MRI = nullptr ;
26
- MachineDominatorTree *DomTree = nullptr ;
27
- MachineBranchProbabilityInfo *MBPI = nullptr ;
28
- MachineLoopInfo *Loops = nullptr ;
29
-
30
- static constexpr unsigned BlockInstrLimit = 30 ;
31
- static constexpr bool Stress = false ;
32
- SSAIfConv IfConv{DEBUG_TYPE, BlockInstrLimit, Stress};
33
-
34
- public:
35
- static char ID;
36
13
37
- AMDGPUIfConverter () : MachineFunctionPass(ID) {}
14
+ namespace {
15
+ unsigned getReversedVCMPXOpcode (unsigned Opcode) {
16
+ // TODO: this is a placeholder for the real function
17
+ switch (Opcode) {
18
+ case AMDGPU::V_CMPX_LT_I32_nosdst_e64:
19
+ return AMDGPU::V_CMPX_GE_I32_nosdst_e64;
20
+ default :
21
+ errs () << " unhandled: " << Opcode << " \n " ;
22
+ llvm_unreachable (" unhandled vcmp opcode" );
23
+ }
24
+ }
38
25
39
- bool runOnMachineFunction (MachineFunction &MF) override ;
26
+ bool needsExecPredication (const SIInstrInfo *TII, const MachineInstr &I) {
27
+ return TII->isVALU (I) || TII->isVMEM (I);
28
+ }
40
29
41
- void getAnalysisUsage (AnalysisUsage &AU) const override ;
30
+ struct ExecPredicate : SSAIfConv::PredicationStrategyBase {
31
+ const SIInstrInfo *TII;
32
+ const SIRegisterInfo *RegInfo;
33
+
34
+ MachineInstr *Cmp = nullptr ;
35
+
36
+ ExecPredicate (const SIInstrInfo *TII)
37
+ : TII(TII), RegInfo(&TII->getRegisterInfo ()) {}
38
+
39
+ bool canConvertIf (MachineBasicBlock *Head, MachineBasicBlock *TBB,
40
+ MachineBasicBlock *FBB, MachineBasicBlock *Tail,
41
+ ArrayRef<MachineOperand> Cond) override {
42
+
43
+ // check that the cmp is just before the branch and that it is promotable to
44
+ // v_cmpx
45
+ const unsigned SupportedBranchOpc[]{
46
+ AMDGPU::S_CBRANCH_SCC0, AMDGPU::S_CBRANCH_SCC1, AMDGPU::S_CBRANCH_VCCNZ,
47
+ AMDGPU::S_CBRANCH_VCCZ};
42
48
43
- bool tryConvertIf (MachineBasicBlock *);
49
+ MachineInstr &CBranch = *Head->getFirstInstrTerminator ();
50
+ if (!llvm::is_contained (SupportedBranchOpc, CBranch.getOpcode ()))
51
+ return false ;
52
+
53
+ auto CmpInstr = std::next (CBranch.getReverseIterator ());
54
+ if (CmpInstr == Head->instr_rend ())
55
+ return false ;
56
+
57
+ Register SCCorVCC = Cond[1 ].getReg ();
58
+ bool ModifiesConditionReg = CmpInstr->modifiesRegister (SCCorVCC, RegInfo);
59
+ if (!ModifiesConditionReg)
60
+ return false ;
61
+
62
+ Cmp = &*CmpInstr;
63
+
64
+ unsigned CmpOpc = Cmp->getOpcode ();
65
+ if (TII->isSALU (*Cmp))
66
+ CmpOpc = TII->getVALUOp (*Cmp);
67
+ if (AMDGPU::getVCMPXOpFromVCMP (CmpOpc) == -1 ) {
68
+ errs () << " unhandled branch " << *Cmp << " \n " ;
69
+ return false ;
70
+ }
71
+
72
+ return true ;
73
+ }
74
+
75
+ bool canPredicateInstr (const MachineInstr &I) override {
76
+
77
+ // TODO: relax this condition, if exec is masked, check that it goes back to
78
+ // normal
79
+ // TODO: what about scc or vcc ? Are they taken into acount in the MBB
80
+ // live-ins ?
81
+ MCRegister Exec = RegInfo->getExec ();
82
+ bool ModifiesExec = I.modifiesRegister (Exec, RegInfo);
83
+ if (ModifiesExec)
84
+ return false ;
85
+
86
+ if (needsExecPredication (TII, I))
87
+ return true ;
88
+
89
+ bool DontMoveAcrossStore = true ;
90
+ bool IsSpeculatable = I.isDereferenceableInvariantLoad () ||
91
+ I.isSafeToMove (DontMoveAcrossStore);
92
+ if (IsSpeculatable)
93
+ return true ;
94
+
95
+ return false ;
96
+ }
44
97
98
+ bool shouldConvertIf (SSAIfConv &IfConv) override {
99
+ // TODO: cost model
100
+ return true ;
101
+ }
102
+
103
+ void predicateBlock (MachineBasicBlock *MBB, ArrayRef<MachineOperand> Cond,
104
+ bool Reverse) override {
105
+ // save exec
106
+ MachineFunction &MF = *MBB->getParent ();
107
+ SIMachineFunctionInfo *MFI = MF.getInfo <SIMachineFunctionInfo>();
108
+
109
+ Register ExecBackup = MFI->getSGPRForEXECCopy ();
110
+
111
+ const DebugLoc &CmpLoc = Cmp->getDebugLoc ();
112
+
113
+ auto FirstInstruction = MBB->begin ();
114
+ const bool IsSCCLive =
115
+ false ; // asume not since the live-ins are supposed to be empty
116
+ TII->insertScratchExecCopy (MF, *MBB, FirstInstruction, CmpLoc, ExecBackup,
117
+ IsSCCLive);
118
+
119
+ // mask exec
120
+ unsigned CmpOpc = Cmp->getOpcode ();
121
+ if (TII->isSALU (*Cmp))
122
+ CmpOpc = TII->getVALUOp (*Cmp);
123
+
124
+ CmpOpc = AMDGPU::getVCMPXOpFromVCMP (CmpOpc);
125
+ if (Reverse)
126
+ CmpOpc = getReversedVCMPXOpcode (CmpOpc);
127
+
128
+ // TODO: handle this properly. The second block may kill those registers.
129
+ Cmp->getOperand (0 ).setIsKill (false );
130
+ Cmp->getOperand (1 ).setIsKill (false );
131
+
132
+ auto VCmpX = BuildMI (*MBB, FirstInstruction, CmpLoc, TII->get (CmpOpc));
133
+ VCmpX->addOperand (Cmp->getOperand (0 ));
134
+ VCmpX->addOperand (Cmp->getOperand (1 ));
135
+
136
+ // restore exec
137
+ TII->restoreExec (MF, *MBB, MBB->end (), DebugLoc (), ExecBackup);
138
+ }
139
+
140
+ ~ExecPredicate () override = default ;
141
+ };
142
+
143
+ const char PassName[] = " AMDGPU If Conversion" ;
144
+
145
+ struct AMDGPUIfConverter : MachineFunctionPass {
146
+ static char ID;
147
+ AMDGPUIfConverter () : MachineFunctionPass(ID) {}
148
+ void getAnalysisUsage (AnalysisUsage &AU) const override ;
149
+ bool runOnMachineFunction (MachineFunction &MF) override ;
45
150
StringRef getPassName () const override { return PassName; }
46
151
};
47
152
48
153
char AMDGPUIfConverter::ID = 0 ;
49
154
50
155
void AMDGPUIfConverter::getAnalysisUsage (AnalysisUsage &AU) const {
51
- AU.addRequired <MachineBranchProbabilityInfoWrapperPass>();
52
156
AU.addRequired <MachineDominatorTreeWrapperPass>();
53
157
AU.addPreserved <MachineDominatorTreeWrapperPass>();
54
158
AU.addRequired <MachineLoopInfoWrapperPass>();
@@ -60,29 +164,19 @@ bool AMDGPUIfConverter::runOnMachineFunction(MachineFunction &MF) {
60
164
if (skipFunction (MF.getFunction ()))
61
165
return false ;
62
166
63
- const TargetSubtargetInfo &STI = MF.getSubtarget ();
64
- TII = STI.getInstrInfo ();
65
- TRI = STI.getRegisterInfo ();
66
- MRI = &MF.getRegInfo ();
67
- SchedModel.init (&STI);
68
- DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree ();
69
- Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI ();
70
- MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI ();
71
-
72
- bool Changed = false ;
73
- IfConv.runOnMachineFunction (MF);
167
+ const auto &STI = MF.getSubtarget <GCNSubtarget>();
168
+ if (!STI.hasGFX10_3Insts ())
169
+ return false ;
74
170
75
- for ( auto *DomNode : post_order (DomTree))
76
- if ( tryConvertIf (DomNode-> getBlock ()))
77
- Changed = true ;
171
+ const SIInstrInfo *TII = STI. getInstrInfo ();
172
+ auto *DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>(). getDomTree ();
173
+ auto *Loops = &getAnalysis<MachineLoopInfoWrapperPass>(). getLI () ;
78
174
79
- return Changed;
175
+ ExecPredicate Predicate (TII);
176
+ SSAIfConv IfConv (Predicate, MF, DomTree, Loops);
177
+ return IfConv.run ();
80
178
}
81
-
82
- bool AMDGPUIfConverter::tryConvertIf (MachineBasicBlock *MBB) { return false ; }
83
-
84
179
} // namespace
85
-
86
180
char &llvm::AMDGPUIfConverterID = AMDGPUIfConverter::ID;
87
181
INITIALIZE_PASS_BEGIN (AMDGPUIfConverter, DEBUG_TYPE, PassName, false , false )
88
182
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
0 commit comments