16
16
// ===----------------------------------------------------------------------===//
17
17
18
18
#include " AMDGPU.h"
19
- #include " SILowerI1Copies.h"
20
19
#include " llvm/CodeGen/MachineFunctionPass.h"
21
- #include " llvm/CodeGen/MachineUniformityAnalysis.h"
22
- #include " llvm/InitializePasses.h"
23
20
24
21
#define DEBUG_TYPE " amdgpu-global-isel-divergence-lowering"
25
22
@@ -45,152 +42,14 @@ class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
45
42
46
43
void getAnalysisUsage (AnalysisUsage &AU) const override {
47
44
AU.setPreservesCFG ();
48
- AU.addRequired <MachineDominatorTree>();
49
- AU.addRequired <MachinePostDominatorTree>();
50
- AU.addRequired <MachineUniformityAnalysisPass>();
51
45
MachineFunctionPass::getAnalysisUsage (AU);
52
46
}
53
47
};
54
48
55
- class DivergenceLoweringHelper : public PhiLoweringHelper {
56
- public:
57
- DivergenceLoweringHelper (MachineFunction *MF, MachineDominatorTree *DT,
58
- MachinePostDominatorTree *PDT,
59
- MachineUniformityInfo *MUI);
60
-
61
- private:
62
- MachineUniformityInfo *MUI = nullptr ;
63
-
64
- public:
65
- void markAsLaneMask (Register DstReg) const override ;
66
- void getCandidatesForLowering (
67
- SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override ;
68
- void collectIncomingValuesFromPhi (
69
- const MachineInstr *MI,
70
- SmallVectorImpl<Incoming> &Incomings) const override ;
71
- void replaceDstReg (Register NewReg, Register OldReg,
72
- MachineBasicBlock *MBB) override ;
73
- void buildMergeLaneMasks (MachineBasicBlock &MBB,
74
- MachineBasicBlock::iterator I, const DebugLoc &DL,
75
- Register DstReg, Register PrevReg,
76
- Register CurReg) override ;
77
- void constrainAsLaneMask (Incoming &In) override ;
78
- };
79
-
80
- DivergenceLoweringHelper::DivergenceLoweringHelper (
81
- MachineFunction *MF, MachineDominatorTree *DT,
82
- MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
83
- : PhiLoweringHelper(MF, DT, PDT), MUI(MUI) {}
84
-
85
- // _(s1) -> SReg_32/64(s1)
86
- void DivergenceLoweringHelper::markAsLaneMask (Register DstReg) const {
87
- assert (MRI->getType (DstReg) == LLT::scalar (1 ));
88
-
89
- if (MRI->getRegClassOrNull (DstReg)) {
90
- MRI->constrainRegClass (DstReg, ST->getBoolRC ());
91
- return ;
92
- }
93
-
94
- MRI->setRegClass (DstReg, ST->getBoolRC ());
95
- }
96
-
97
- void DivergenceLoweringHelper::getCandidatesForLowering (
98
- SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
99
- LLT S1 = LLT::scalar (1 );
100
-
101
- // Add divergent i1 phis to the list
102
- for (MachineBasicBlock &MBB : *MF) {
103
- for (MachineInstr &MI : MBB.phis ()) {
104
- Register Dst = MI.getOperand (0 ).getReg ();
105
- if (MRI->getType (Dst) == S1 && MUI->isDivergent (Dst))
106
- Vreg1Phis.push_back (&MI);
107
- }
108
- }
109
- }
110
-
111
- void DivergenceLoweringHelper::collectIncomingValuesFromPhi (
112
- const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
113
- for (unsigned i = 1 ; i < MI->getNumOperands (); i += 2 ) {
114
- Incomings.emplace_back (MI->getOperand (i).getReg (),
115
- MI->getOperand (i + 1 ).getMBB (), Register ());
116
- }
117
- }
118
-
119
- void DivergenceLoweringHelper::replaceDstReg (Register NewReg, Register OldReg,
120
- MachineBasicBlock *MBB) {
121
- BuildMI (*MBB, MBB->getFirstNonPHI (), {}, TII->get (AMDGPU::COPY), OldReg)
122
- .addReg (NewReg);
123
- }
124
-
125
- // Get pointers to build instruction just after MI (skips phis if needed)
126
- static std::pair<MachineBasicBlock *, MachineBasicBlock::iterator>
127
- getInsertAfterPtrs (MachineInstr *MI) {
128
- MachineBasicBlock *InsertMBB = MI->getParent ();
129
- return {InsertMBB,
130
- InsertMBB->SkipPHIsAndLabels (std::next (MI->getIterator ()))};
131
- }
132
-
133
- // bb.previous
134
- // %PrevReg = ...
135
- //
136
- // bb.current
137
- // %CurReg = ...
138
- //
139
- // %DstReg - not defined
140
- //
141
- // -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
142
- //
143
- // bb.previous
144
- // %PrevReg = ...
145
- // %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
146
- //
147
- // bb.current
148
- // %CurReg = ...
149
- // %CurRegCopy:sreg_32(s1) = COPY %CurReg
150
- // ...
151
- // %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
152
- // %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
153
- // %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
154
- //
155
- // DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
156
- void DivergenceLoweringHelper::buildMergeLaneMasks (
157
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
158
- Register DstReg, Register PrevReg, Register CurReg) {
159
- // DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
160
- // TODO: check if inputs are constants or results of a compare.
161
-
162
- Register PrevRegCopy = createLaneMaskReg (MRI, LaneMaskRegAttrs);
163
- auto [PrevMBB, AfterPrevReg] = getInsertAfterPtrs (MRI->getVRegDef (PrevReg));
164
- BuildMI (*PrevMBB, AfterPrevReg, DL, TII->get (AMDGPU::COPY), PrevRegCopy)
165
- .addReg (PrevReg);
166
- Register PrevMaskedReg = createLaneMaskReg (MRI, LaneMaskRegAttrs);
167
- BuildMI (MBB, I, DL, TII->get (AndN2Op), PrevMaskedReg)
168
- .addReg (PrevRegCopy)
169
- .addReg (ExecReg);
170
-
171
- Register CurRegCopy = createLaneMaskReg (MRI, LaneMaskRegAttrs);
172
- auto [CurMBB, AfterCurReg] = getInsertAfterPtrs (MRI->getVRegDef (CurReg));
173
- BuildMI (*CurMBB, AfterCurReg, DL, TII->get (AMDGPU::COPY), CurRegCopy)
174
- .addReg (CurReg);
175
- Register CurMaskedReg = createLaneMaskReg (MRI, LaneMaskRegAttrs);
176
- BuildMI (MBB, I, DL, TII->get (AndOp), CurMaskedReg)
177
- .addReg (ExecReg)
178
- .addReg (CurRegCopy);
179
-
180
- BuildMI (MBB, I, DL, TII->get (OrOp), DstReg)
181
- .addReg (PrevMaskedReg)
182
- .addReg (CurMaskedReg);
183
- }
184
-
185
- void DivergenceLoweringHelper::constrainAsLaneMask (Incoming &In) { return ; }
186
-
187
49
} // End anonymous namespace.
188
50
189
51
INITIALIZE_PASS_BEGIN (AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
190
52
" AMDGPU GlobalISel divergence lowering" , false , false )
191
- INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
192
- INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
193
- INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
194
53
INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
195
54
" AMDGPU GlobalISel divergence lowering" , false , false )
196
55
@@ -205,14 +64,5 @@ FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
205
64
206
65
bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction (
207
66
MachineFunction &MF) {
208
- MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
209
- MachinePostDominatorTree &PDT = getAnalysis<MachinePostDominatorTree>();
210
- MachineUniformityInfo &MUI =
211
- getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo ();
212
-
213
- DivergenceLoweringHelper Helper (&MF, &DT, &PDT, &MUI);
214
-
215
- bool Changed = false ;
216
- Changed |= Helper.lowerPhis ();
217
- return Changed;
67
+ return false ;
218
68
}
0 commit comments