Skip to content

Commit 90bdf76

Browse files
Revert "AMDGPU/GlobalISelDivergenceLowering: select divergent i1 phis" (#78468)
Reverts #76145
1 parent 5ddd7bc commit 90bdf76

17 files changed

+234
-770
lines changed

llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,6 @@ MachineUniformityInfo computeMachineUniformityInfo(
3232
MachineFunction &F, const MachineCycleInfo &cycleInfo,
3333
const MachineDomTree &domTree, bool HasBranchDivergence);
3434

35-
/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
36-
class MachineUniformityAnalysisPass : public MachineFunctionPass {
37-
MachineUniformityInfo UI;
38-
39-
public:
40-
static char ID;
41-
42-
MachineUniformityAnalysisPass();
43-
44-
MachineUniformityInfo &getUniformityInfo() { return UI; }
45-
const MachineUniformityInfo &getUniformityInfo() const { return UI; }
46-
47-
bool runOnMachineFunction(MachineFunction &F) override;
48-
void getAnalysisUsage(AnalysisUsage &AU) const override;
49-
void print(raw_ostream &OS, const Module *M = nullptr) const override;
50-
51-
// TODO: verify analysis
52-
};
53-
5435
} // namespace llvm
5536

5637
#endif // LLVM_CODEGEN_MACHINEUNIFORMITYANALYSIS_H

llvm/lib/CodeGen/MachineUniformityAnalysis.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,25 @@ MachineUniformityInfo llvm::computeMachineUniformityInfo(
165165

166166
namespace {
167167

168+
/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
169+
class MachineUniformityAnalysisPass : public MachineFunctionPass {
170+
MachineUniformityInfo UI;
171+
172+
public:
173+
static char ID;
174+
175+
MachineUniformityAnalysisPass();
176+
177+
MachineUniformityInfo &getUniformityInfo() { return UI; }
178+
const MachineUniformityInfo &getUniformityInfo() const { return UI; }
179+
180+
bool runOnMachineFunction(MachineFunction &F) override;
181+
void getAnalysisUsage(AnalysisUsage &AU) const override;
182+
void print(raw_ostream &OS, const Module *M = nullptr) const override;
183+
184+
// TODO: verify analysis
185+
};
186+
168187
class MachineUniformityInfoPrinterPass : public MachineFunctionPass {
169188
public:
170189
static char ID;

llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp

Lines changed: 1 addition & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,7 @@
1616
//===----------------------------------------------------------------------===//
1717

1818
#include "AMDGPU.h"
19-
#include "SILowerI1Copies.h"
2019
#include "llvm/CodeGen/MachineFunctionPass.h"
21-
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
22-
#include "llvm/InitializePasses.h"
2320

2421
#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
2522

@@ -45,152 +42,14 @@ class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
4542

4643
void getAnalysisUsage(AnalysisUsage &AU) const override {
4744
AU.setPreservesCFG();
48-
AU.addRequired<MachineDominatorTree>();
49-
AU.addRequired<MachinePostDominatorTree>();
50-
AU.addRequired<MachineUniformityAnalysisPass>();
5145
MachineFunctionPass::getAnalysisUsage(AU);
5246
}
5347
};
5448

55-
class DivergenceLoweringHelper : public PhiLoweringHelper {
56-
public:
57-
DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
58-
MachinePostDominatorTree *PDT,
59-
MachineUniformityInfo *MUI);
60-
61-
private:
62-
MachineUniformityInfo *MUI = nullptr;
63-
64-
public:
65-
void markAsLaneMask(Register DstReg) const override;
66-
void getCandidatesForLowering(
67-
SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
68-
void collectIncomingValuesFromPhi(
69-
const MachineInstr *MI,
70-
SmallVectorImpl<Incoming> &Incomings) const override;
71-
void replaceDstReg(Register NewReg, Register OldReg,
72-
MachineBasicBlock *MBB) override;
73-
void buildMergeLaneMasks(MachineBasicBlock &MBB,
74-
MachineBasicBlock::iterator I, const DebugLoc &DL,
75-
Register DstReg, Register PrevReg,
76-
Register CurReg) override;
77-
void constrainAsLaneMask(Incoming &In) override;
78-
};
79-
80-
DivergenceLoweringHelper::DivergenceLoweringHelper(
81-
MachineFunction *MF, MachineDominatorTree *DT,
82-
MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
83-
: PhiLoweringHelper(MF, DT, PDT), MUI(MUI) {}
84-
85-
// _(s1) -> SReg_32/64(s1)
86-
void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
87-
assert(MRI->getType(DstReg) == LLT::scalar(1));
88-
89-
if (MRI->getRegClassOrNull(DstReg)) {
90-
MRI->constrainRegClass(DstReg, ST->getBoolRC());
91-
return;
92-
}
93-
94-
MRI->setRegClass(DstReg, ST->getBoolRC());
95-
}
96-
97-
void DivergenceLoweringHelper::getCandidatesForLowering(
98-
SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
99-
LLT S1 = LLT::scalar(1);
100-
101-
// Add divergent i1 phis to the list
102-
for (MachineBasicBlock &MBB : *MF) {
103-
for (MachineInstr &MI : MBB.phis()) {
104-
Register Dst = MI.getOperand(0).getReg();
105-
if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
106-
Vreg1Phis.push_back(&MI);
107-
}
108-
}
109-
}
110-
111-
void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
112-
const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
113-
for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
114-
Incomings.emplace_back(MI->getOperand(i).getReg(),
115-
MI->getOperand(i + 1).getMBB(), Register());
116-
}
117-
}
118-
119-
void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
120-
MachineBasicBlock *MBB) {
121-
BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
122-
.addReg(NewReg);
123-
}
124-
125-
// Get pointers to build instruction just after MI (skips phis if needed)
126-
static std::pair<MachineBasicBlock *, MachineBasicBlock::iterator>
127-
getInsertAfterPtrs(MachineInstr *MI) {
128-
MachineBasicBlock *InsertMBB = MI->getParent();
129-
return {InsertMBB,
130-
InsertMBB->SkipPHIsAndLabels(std::next(MI->getIterator()))};
131-
}
132-
133-
// bb.previous
134-
// %PrevReg = ...
135-
//
136-
// bb.current
137-
// %CurReg = ...
138-
//
139-
// %DstReg - not defined
140-
//
141-
// -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
142-
//
143-
// bb.previous
144-
// %PrevReg = ...
145-
// %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
146-
//
147-
// bb.current
148-
// %CurReg = ...
149-
// %CurRegCopy:sreg_32(s1) = COPY %CurReg
150-
// ...
151-
// %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
152-
// %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
153-
// %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
154-
//
155-
// DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
156-
void DivergenceLoweringHelper::buildMergeLaneMasks(
157-
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
158-
Register DstReg, Register PrevReg, Register CurReg) {
159-
// DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
160-
// TODO: check if inputs are constants or results of a compare.
161-
162-
Register PrevRegCopy = createLaneMaskReg(MRI, LaneMaskRegAttrs);
163-
auto [PrevMBB, AfterPrevReg] = getInsertAfterPtrs(MRI->getVRegDef(PrevReg));
164-
BuildMI(*PrevMBB, AfterPrevReg, DL, TII->get(AMDGPU::COPY), PrevRegCopy)
165-
.addReg(PrevReg);
166-
Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
167-
BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg)
168-
.addReg(PrevRegCopy)
169-
.addReg(ExecReg);
170-
171-
Register CurRegCopy = createLaneMaskReg(MRI, LaneMaskRegAttrs);
172-
auto [CurMBB, AfterCurReg] = getInsertAfterPtrs(MRI->getVRegDef(CurReg));
173-
BuildMI(*CurMBB, AfterCurReg, DL, TII->get(AMDGPU::COPY), CurRegCopy)
174-
.addReg(CurReg);
175-
Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
176-
BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg)
177-
.addReg(ExecReg)
178-
.addReg(CurRegCopy);
179-
180-
BuildMI(MBB, I, DL, TII->get(OrOp), DstReg)
181-
.addReg(PrevMaskedReg)
182-
.addReg(CurMaskedReg);
183-
}
184-
185-
void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) { return; }
186-
18749
} // End anonymous namespace.
18850

18951
INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
19052
"AMDGPU GlobalISel divergence lowering", false, false)
191-
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
192-
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
193-
INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
19453
INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
19554
"AMDGPU GlobalISel divergence lowering", false, false)
19655

@@ -205,14 +64,5 @@ FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
20564

20665
bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
20766
MachineFunction &MF) {
208-
MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
209-
MachinePostDominatorTree &PDT = getAnalysis<MachinePostDominatorTree>();
210-
MachineUniformityInfo &MUI =
211-
getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
212-
213-
DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
214-
215-
bool Changed = false;
216-
Changed |= Helper.lowerPhis();
217-
return Changed;
67+
return false;
21868
}

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -210,14 +210,6 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
210210
bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
211211
const Register DefReg = I.getOperand(0).getReg();
212212
const LLT DefTy = MRI->getType(DefReg);
213-
// Lane mask PHIs, PHI where all register operands have sgpr register class
214-
// with S1 LLT, are already selected in divergence lowering pass.
215-
if (I.getOpcode() == AMDGPU::PHI) {
216-
assert(MRI->getType(DefReg) == LLT::scalar(1));
217-
assert(TRI.isSGPRClass(MRI->getRegClass(DefReg)));
218-
return true;
219-
}
220-
221213
if (DefTy == LLT::scalar(1)) {
222214
if (!AllowRiskySelect) {
223215
LLVM_DEBUG(dbgs() << "Skipping risky boolean phi\n");

llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class Vreg1LoweringHelper : public PhiLoweringHelper {
7878
MachineBasicBlock::iterator I, const DebugLoc &DL,
7979
Register DstReg, Register PrevReg,
8080
Register CurReg) override;
81-
void constrainAsLaneMask(Incoming &In) override;
81+
void constrainIncomingRegisterTakenAsIs(Incoming &In) override;
8282

8383
bool lowerCopiesFromI1();
8484
bool lowerCopiesToI1();
@@ -619,7 +619,7 @@ bool PhiLoweringHelper::lowerPhis() {
619619
for (auto &Incoming : Incomings) {
620620
MachineBasicBlock &IMBB = *Incoming.Block;
621621
if (PIA.isSource(IMBB)) {
622-
constrainAsLaneMask(Incoming);
622+
constrainIncomingRegisterTakenAsIs(Incoming);
623623
SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg);
624624
} else {
625625
Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
@@ -911,4 +911,6 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
911911
}
912912
}
913913

914-
void Vreg1LoweringHelper::constrainAsLaneMask(Incoming &In) { return; }
914+
void Vreg1LoweringHelper::constrainIncomingRegisterTakenAsIs(Incoming &In) {
915+
return;
916+
}

llvm/lib/Target/AMDGPU/SILowerI1Copies.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ class PhiLoweringHelper {
9191
MachineBasicBlock::iterator I,
9292
const DebugLoc &DL, Register DstReg,
9393
Register PrevReg, Register CurReg) = 0;
94-
virtual void constrainAsLaneMask(Incoming &In) = 0;
94+
virtual void constrainIncomingRegisterTakenAsIs(Incoming &In) = 0;
9595
};
9696

9797
} // end namespace llvm

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
22
; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
3-
; REQUIRES: do-not-run-me
43

54
; Divergent phis that don't require lowering using lane mask merging
65

0 commit comments

Comments
 (0)