16
16
// ===----------------------------------------------------------------------===//
17
17
18
18
#include " AMDGPU.h"
19
+ #include " SILowerI1Copies.h"
19
20
#include " llvm/CodeGen/MachineFunctionPass.h"
21
+ #include " llvm/CodeGen/MachineUniformityAnalysis.h"
22
+ #include " llvm/InitializePasses.h"
20
23
21
24
#define DEBUG_TYPE " amdgpu-global-isel-divergence-lowering"
22
25
@@ -42,14 +45,152 @@ class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
42
45
43
46
void getAnalysisUsage (AnalysisUsage &AU) const override {
44
47
AU.setPreservesCFG ();
48
+ AU.addRequired <MachineDominatorTree>();
49
+ AU.addRequired <MachinePostDominatorTree>();
50
+ AU.addRequired <MachineUniformityAnalysisPass>();
45
51
MachineFunctionPass::getAnalysisUsage (AU);
46
52
}
47
53
};
48
54
55
+ class DivergenceLoweringHelper : public PhiLoweringHelper {
56
+ public:
57
+ DivergenceLoweringHelper (MachineFunction *MF, MachineDominatorTree *DT,
58
+ MachinePostDominatorTree *PDT,
59
+ MachineUniformityInfo *MUI);
60
+
61
+ private:
62
+ MachineUniformityInfo *MUI = nullptr ;
63
+
64
+ public:
65
+ void markAsLaneMask (Register DstReg) const override ;
66
+ void getCandidatesForLowering (
67
+ SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override ;
68
+ void collectIncomingValuesFromPhi (
69
+ const MachineInstr *MI,
70
+ SmallVectorImpl<Incoming> &Incomings) const override ;
71
+ void replaceDstReg (Register NewReg, Register OldReg,
72
+ MachineBasicBlock *MBB) override ;
73
+ void buildMergeLaneMasks (MachineBasicBlock &MBB,
74
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
75
+ Register DstReg, Register PrevReg,
76
+ Register CurReg) override ;
77
+ void constrainAsLaneMask (Incoming &In) override ;
78
+ };
79
+
80
+ DivergenceLoweringHelper::DivergenceLoweringHelper (
81
+ MachineFunction *MF, MachineDominatorTree *DT,
82
+ MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
83
+ : PhiLoweringHelper(MF, DT, PDT), MUI(MUI) {}
84
+
85
+ // _(s1) -> SReg_32/64(s1)
86
+ void DivergenceLoweringHelper::markAsLaneMask (Register DstReg) const {
87
+ assert (MRI->getType (DstReg) == LLT::scalar (1 ));
88
+
89
+ if (MRI->getRegClassOrNull (DstReg)) {
90
+ MRI->constrainRegClass (DstReg, ST->getBoolRC ());
91
+ return ;
92
+ }
93
+
94
+ MRI->setRegClass (DstReg, ST->getBoolRC ());
95
+ }
96
+
97
+ void DivergenceLoweringHelper::getCandidatesForLowering (
98
+ SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
99
+ LLT S1 = LLT::scalar (1 );
100
+
101
+ // Add divergent i1 phis to the list
102
+ for (MachineBasicBlock &MBB : *MF) {
103
+ for (MachineInstr &MI : MBB.phis ()) {
104
+ Register Dst = MI.getOperand (0 ).getReg ();
105
+ if (MRI->getType (Dst) == S1 && MUI->isDivergent (Dst))
106
+ Vreg1Phis.push_back (&MI);
107
+ }
108
+ }
109
+ }
110
+
111
+ void DivergenceLoweringHelper::collectIncomingValuesFromPhi (
112
+ const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
113
+ for (unsigned i = 1 ; i < MI->getNumOperands (); i += 2 ) {
114
+ Incomings.emplace_back (MI->getOperand (i).getReg (),
115
+ MI->getOperand (i + 1 ).getMBB (), Register ());
116
+ }
117
+ }
118
+
119
+ void DivergenceLoweringHelper::replaceDstReg (Register NewReg, Register OldReg,
120
+ MachineBasicBlock *MBB) {
121
+ BuildMI (*MBB, MBB->getFirstNonPHI (), {}, TII->get (AMDGPU::COPY), OldReg)
122
+ .addReg (NewReg);
123
+ }
124
+
125
+ // Get pointers to build instruction just after MI (skips phis if needed)
126
+ static std::pair<MachineBasicBlock *, MachineBasicBlock::iterator>
127
+ getInsertAfterPtrs (MachineInstr *MI) {
128
+ MachineBasicBlock *InsertMBB = MI->getParent ();
129
+ return {InsertMBB,
130
+ InsertMBB->SkipPHIsAndLabels (std::next (MI->getIterator ()))};
131
+ }
132
+
133
+ // bb.previous
134
+ // %PrevReg = ...
135
+ //
136
+ // bb.current
137
+ // %CurReg = ...
138
+ //
139
+ // %DstReg - not defined
140
+ //
141
+ // -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
142
+ //
143
+ // bb.previous
144
+ // %PrevReg = ...
145
+ // %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
146
+ //
147
+ // bb.current
148
+ // %CurReg = ...
149
+ // %CurRegCopy:sreg_32(s1) = COPY %CurReg
150
+ // ...
151
+ // %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
152
+ // %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
153
+ // %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
154
+ //
155
+ // DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
156
+ void DivergenceLoweringHelper::buildMergeLaneMasks (
157
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
158
+ Register DstReg, Register PrevReg, Register CurReg) {
159
+ // DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
160
+ // TODO: check if inputs are constants or results of a compare.
161
+
162
+ Register PrevRegCopy = createLaneMaskReg (MRI, LaneMaskRegAttrs);
163
+ auto [PrevMBB, AfterPrevReg] = getInsertAfterPtrs (MRI->getVRegDef (PrevReg));
164
+ BuildMI (*PrevMBB, AfterPrevReg, DL, TII->get (AMDGPU::COPY), PrevRegCopy)
165
+ .addReg (PrevReg);
166
+ Register PrevMaskedReg = createLaneMaskReg (MRI, LaneMaskRegAttrs);
167
+ BuildMI (MBB, I, DL, TII->get (AndN2Op), PrevMaskedReg)
168
+ .addReg (PrevRegCopy)
169
+ .addReg (ExecReg);
170
+
171
+ Register CurRegCopy = createLaneMaskReg (MRI, LaneMaskRegAttrs);
172
+ auto [CurMBB, AfterCurReg] = getInsertAfterPtrs (MRI->getVRegDef (CurReg));
173
+ BuildMI (*CurMBB, AfterCurReg, DL, TII->get (AMDGPU::COPY), CurRegCopy)
174
+ .addReg (CurReg);
175
+ Register CurMaskedReg = createLaneMaskReg (MRI, LaneMaskRegAttrs);
176
+ BuildMI (MBB, I, DL, TII->get (AndOp), CurMaskedReg)
177
+ .addReg (ExecReg)
178
+ .addReg (CurRegCopy);
179
+
180
+ BuildMI (MBB, I, DL, TII->get (OrOp), DstReg)
181
+ .addReg (PrevMaskedReg)
182
+ .addReg (CurMaskedReg);
183
+ }
184
+
185
+ void DivergenceLoweringHelper::constrainAsLaneMask (Incoming &In) { return ; }
186
+
49
187
} // End anonymous namespace.
50
188
51
189
INITIALIZE_PASS_BEGIN (AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
52
190
" AMDGPU GlobalISel divergence lowering" , false , false )
191
+ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
192
+ INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
193
+ INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
53
194
INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
54
195
" AMDGPU GlobalISel divergence lowering" , false , false )
55
196
@@ -64,5 +205,14 @@ FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
64
205
65
206
bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction (
66
207
MachineFunction &MF) {
67
- return false ;
208
+ MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
209
+ MachinePostDominatorTree &PDT = getAnalysis<MachinePostDominatorTree>();
210
+ MachineUniformityInfo &MUI =
211
+ getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo ();
212
+
213
+ DivergenceLoweringHelper Helper (&MF, &DT, &PDT, &MUI);
214
+
215
+ bool Changed = false ;
216
+ Changed |= Helper.lowerPhis ();
217
+ return Changed;
68
218
}
0 commit comments