Skip to content

Revert "AMDGPU/GlobalISelDivergenceLowering: select divergent i1 phis" #79274

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions llvm/include/llvm/CodeGen/MachineRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -752,17 +752,6 @@ class MachineRegisterInfo {
Register createVirtualRegister(const TargetRegisterClass *RegClass,
StringRef Name = "");

/// All avilable attributes a virtual register can have.
struct RegisterAttributes {
const RegClassOrRegBank *RCOrRB;
LLT Ty;
};

/// createVirtualRegister - Create and return a new virtual register in the
/// function with the specified register attributes.
Register createVirtualRegister(RegisterAttributes RegAttr,
StringRef Name = "");

/// Create and return a new virtual register in the function with the same
/// attributes as the given register.
Register cloneVirtualRegister(Register VReg, StringRef Name = "");
Expand Down
19 changes: 0 additions & 19 deletions llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,25 +32,6 @@ MachineUniformityInfo computeMachineUniformityInfo(
MachineFunction &F, const MachineCycleInfo &cycleInfo,
const MachineDomTree &domTree, bool HasBranchDivergence);

/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
class MachineUniformityAnalysisPass : public MachineFunctionPass {
MachineUniformityInfo UI;

public:
static char ID;

MachineUniformityAnalysisPass();

MachineUniformityInfo &getUniformityInfo() { return UI; }
const MachineUniformityInfo &getUniformityInfo() const { return UI; }

bool runOnMachineFunction(MachineFunction &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
void print(raw_ostream &OS, const Module *M = nullptr) const override;

// TODO: verify analysis
};

} // namespace llvm

#endif // LLVM_CODEGEN_MACHINEUNIFORMITYANALYSIS_H
11 changes: 0 additions & 11 deletions llvm/lib/CodeGen/MachineRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,17 +167,6 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
return Reg;
}

/// createVirtualRegister - Create and return a new virtual register in the
/// function with the specified register attributes.
Register MachineRegisterInfo::createVirtualRegister(RegisterAttributes RegAttr,
StringRef Name) {
Register Reg = createIncompleteVirtualRegister(Name);
VRegInfo[Reg].first = *RegAttr.RCOrRB;
setType(Reg, RegAttr.Ty);
noteNewVirtualRegister(Reg);
return Reg;
}

Register MachineRegisterInfo::cloneVirtualRegister(Register VReg,
StringRef Name) {
Register Reg = createIncompleteVirtualRegister(Name);
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,25 @@ MachineUniformityInfo llvm::computeMachineUniformityInfo(

namespace {

/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
class MachineUniformityAnalysisPass : public MachineFunctionPass {
MachineUniformityInfo UI;

public:
static char ID;

MachineUniformityAnalysisPass();

MachineUniformityInfo &getUniformityInfo() { return UI; }
const MachineUniformityInfo &getUniformityInfo() const { return UI; }

bool runOnMachineFunction(MachineFunction &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
void print(raw_ostream &OS, const Module *M = nullptr) const override;

// TODO: verify analysis
};

class MachineUniformityInfoPrinterPass : public MachineFunctionPass {
public:
static char ID;
Expand Down
145 changes: 1 addition & 144 deletions llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,7 @@
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "SILowerI1Copies.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/InitializePasses.h"

#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"

Expand All @@ -46,146 +42,14 @@ class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineUniformityAnalysisPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};

class DivergenceLoweringHelper : public PhiLoweringHelper {
public:
DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
MachinePostDominatorTree *PDT,
MachineUniformityInfo *MUI);

private:
MachineUniformityInfo *MUI = nullptr;
MachineIRBuilder B;
Register buildRegCopyToLaneMask(Register Reg);

public:
void markAsLaneMask(Register DstReg) const override;
void getCandidatesForLowering(
SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
void collectIncomingValuesFromPhi(
const MachineInstr *MI,
SmallVectorImpl<Incoming> &Incomings) const override;
void replaceDstReg(Register NewReg, Register OldReg,
MachineBasicBlock *MBB) override;
void buildMergeLaneMasks(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
Register DstReg, Register PrevReg,
Register CurReg) override;
void constrainAsLaneMask(Incoming &In) override;
};

DivergenceLoweringHelper::DivergenceLoweringHelper(
MachineFunction *MF, MachineDominatorTree *DT,
MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
: PhiLoweringHelper(MF, DT, PDT), MUI(MUI), B(*MF) {}

// _(s1) -> SReg_32/64(s1)
void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
assert(MRI->getType(DstReg) == LLT::scalar(1));

if (MRI->getRegClassOrNull(DstReg)) {
if (MRI->constrainRegClass(DstReg, ST->getBoolRC()))
return;
llvm_unreachable("Failed to constrain register class");
}

MRI->setRegClass(DstReg, ST->getBoolRC());
}

void DivergenceLoweringHelper::getCandidatesForLowering(
SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
LLT S1 = LLT::scalar(1);

// Add divergent i1 phis to the list
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB.phis()) {
Register Dst = MI.getOperand(0).getReg();
if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
Vreg1Phis.push_back(&MI);
}
}
}

void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
Incomings.emplace_back(MI->getOperand(i).getReg(),
MI->getOperand(i + 1).getMBB(), Register());
}
}

void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
MachineBasicBlock *MBB) {
BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
.addReg(NewReg);
}

// Copy Reg to new lane mask register, insert a copy after instruction that
// defines Reg while skipping phis if needed.
Register DivergenceLoweringHelper::buildRegCopyToLaneMask(Register Reg) {
Register LaneMask = createLaneMaskReg(MRI, LaneMaskRegAttrs);
MachineInstr *Instr = MRI->getVRegDef(Reg);
MachineBasicBlock *MBB = Instr->getParent();
B.setInsertPt(*MBB, MBB->SkipPHIsAndLabels(std::next(Instr->getIterator())));
B.buildCopy(LaneMask, Reg);
return LaneMask;
}

// bb.previous
// %PrevReg = ...
//
// bb.current
// %CurReg = ...
//
// %DstReg - not defined
//
// -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
//
// bb.previous
// %PrevReg = ...
// %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
//
// bb.current
// %CurReg = ...
// %CurRegCopy:sreg_32(s1) = COPY %CurReg
// ...
// %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
// %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
// %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
//
// DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
void DivergenceLoweringHelper::buildMergeLaneMasks(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
Register DstReg, Register PrevReg, Register CurReg) {
// DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
// TODO: check if inputs are constants or results of a compare.

Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);
Register CurRegCopy = buildRegCopyToLaneMask(CurReg);
Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);

B.setInsertPt(MBB, I);
B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
}

void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) { return; }

} // End anonymous namespace.

INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
"AMDGPU GlobalISel divergence lowering", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
"AMDGPU GlobalISel divergence lowering", false, false)

Expand All @@ -200,12 +64,5 @@ FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {

bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
MachineFunction &MF) {
MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
MachinePostDominatorTree &PDT = getAnalysis<MachinePostDominatorTree>();
MachineUniformityInfo &MUI =
getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();

DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);

return Helper.lowerPhis();
return false;
}
5 changes: 2 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI->getType(DefReg);

if (DefTy == LLT::scalar(1)) {
if (!AllowRiskySelect) {
LLVM_DEBUG(dbgs() << "Skipping risky boolean phi\n");
Expand Down Expand Up @@ -3553,6 +3552,8 @@ bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const {
}

bool AMDGPUInstructionSelector::select(MachineInstr &I) {
if (I.isPHI())
return selectPHI(I);

if (!I.isPreISelOpcode()) {
if (I.isCopy())
Expand Down Expand Up @@ -3695,8 +3696,6 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return selectWaveAddress(I);
case AMDGPU::G_STACKRESTORE:
return selectStackRestore(I);
case AMDGPU::G_PHI:
return selectPHI(I);
default:
return selectImpl(I, *CoverageInfo);
}
Expand Down
30 changes: 15 additions & 15 deletions llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@

using namespace llvm;

static Register
insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs);
static Register insertUndefLaneMask(MachineBasicBlock *MBB,
MachineRegisterInfo *MRI,
Register LaneMaskRegAttrs);

namespace {

Expand Down Expand Up @@ -78,7 +78,7 @@ class Vreg1LoweringHelper : public PhiLoweringHelper {
MachineBasicBlock::iterator I, const DebugLoc &DL,
Register DstReg, Register PrevReg,
Register CurReg) override;
void constrainAsLaneMask(Incoming &In) override;
void constrainIncomingRegisterTakenAsIs(Incoming &In) override;

bool lowerCopiesFromI1();
bool lowerCopiesToI1();
Expand Down Expand Up @@ -304,8 +304,7 @@ class LoopFinder {
/// blocks, so that the SSA updater doesn't have to search all the way to the
/// function entry.
void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
MachineRegisterInfo &MRI,
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs,
MachineRegisterInfo &MRI, Register LaneMaskRegAttrs,
ArrayRef<Incoming> Incomings = {}) {
assert(LoopLevel < CommonDominators.size());

Expand Down Expand Up @@ -412,15 +411,14 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
return new SILowerI1Copies();
}

Register llvm::createLaneMaskReg(
MachineRegisterInfo *MRI,
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs) {
return MRI->createVirtualRegister(LaneMaskRegAttrs);
Register llvm::createLaneMaskReg(MachineRegisterInfo *MRI,
Register LaneMaskRegAttrs) {
return MRI->cloneVirtualRegister(LaneMaskRegAttrs);
}

static Register
insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs) {
static Register insertUndefLaneMask(MachineBasicBlock *MBB,
MachineRegisterInfo *MRI,
Register LaneMaskRegAttrs) {
MachineFunction &MF = *MBB->getParent();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
Expand Down Expand Up @@ -621,7 +619,7 @@ bool PhiLoweringHelper::lowerPhis() {
for (auto &Incoming : Incomings) {
MachineBasicBlock &IMBB = *Incoming.Block;
if (PIA.isSource(IMBB)) {
constrainAsLaneMask(Incoming);
constrainIncomingRegisterTakenAsIs(Incoming);
SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg);
} else {
Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
Expand Down Expand Up @@ -913,4 +911,6 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
}
}

void Vreg1LoweringHelper::constrainAsLaneMask(Incoming &In) {}
void Vreg1LoweringHelper::constrainIncomingRegisterTakenAsIs(Incoming &In) {
return;
}
11 changes: 4 additions & 7 deletions llvm/lib/Target/AMDGPU/SILowerI1Copies.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,7 @@ struct Incoming {
: Reg(Reg), Block(Block), UpdatedReg(UpdatedReg) {}
};

Register
createLaneMaskReg(MachineRegisterInfo *MRI,
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs);
Register createLaneMaskReg(MachineRegisterInfo *MRI, Register LaneMaskRegAttrs);

class PhiLoweringHelper {
public:
Expand All @@ -49,7 +47,7 @@ class PhiLoweringHelper {
MachineRegisterInfo *MRI = nullptr;
const GCNSubtarget *ST = nullptr;
const SIInstrInfo *TII = nullptr;
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs;
Register LaneMaskRegAttrs;

#ifndef NDEBUG
DenseSet<Register> PhiRegisters;
Expand All @@ -70,8 +68,7 @@ class PhiLoweringHelper {
getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;

void initializeLaneMaskRegisterAttributes(Register LaneMask) {
LaneMaskRegAttrs.RCOrRB = &MRI->getRegClassOrRegBank(LaneMask);
LaneMaskRegAttrs.Ty = MRI->getType(LaneMask);
LaneMaskRegAttrs = LaneMask;
}

bool isLaneMaskReg(Register Reg) const {
Expand All @@ -94,7 +91,7 @@ class PhiLoweringHelper {
MachineBasicBlock::iterator I,
const DebugLoc &DL, Register DstReg,
Register PrevReg, Register CurReg) = 0;
virtual void constrainAsLaneMask(Incoming &In) = 0;
virtual void constrainIncomingRegisterTakenAsIs(Incoming &In) = 0;
};

} // end namespace llvm
Loading