Skip to content

Commit 6524a7a

Browse files
committed
[AMDGPU]: PHI Elimination hooks added for custom COPY insertion. Fixed
Defferential Revision: https://reviews.llvm.org/D67101 Reviewers: rampitec, vpykhtin llvm-svn: 372086
1 parent 95b28a4 commit 6524a7a

File tree

7 files changed

+176
-30
lines changed

7 files changed

+176
-30
lines changed

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/CodeGen/MachineCombinerPattern.h"
2323
#include "llvm/CodeGen/MachineFunction.h"
2424
#include "llvm/CodeGen/MachineInstr.h"
25+
#include "llvm/CodeGen/MachineInstrBuilder.h"
2526
#include "llvm/CodeGen/MachineLoopInfo.h"
2627
#include "llvm/CodeGen/MachineOperand.h"
2728
#include "llvm/CodeGen/MachineOutliner.h"
@@ -1638,6 +1639,28 @@ class TargetInstrInfo : public MCInstrInfo {
16381639
return false;
16391640
}
16401641

1642+
/// During PHI eleimination lets target to make necessary checks and
1643+
/// insert the copy to the PHI destination register in a target specific
1644+
/// manner.
1645+
virtual MachineInstr *createPHIDestinationCopy(
1646+
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt,
1647+
const DebugLoc &DL, Register Src, Register Dst) const {
1648+
return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst)
1649+
.addReg(Src);
1650+
}
1651+
1652+
/// During PHI eleimination lets target to make necessary checks and
1653+
/// insert the copy to the PHI destination register in a target specific
1654+
/// manner.
1655+
virtual MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
1656+
MachineBasicBlock::iterator InsPt,
1657+
const DebugLoc &DL, Register Src,
1658+
Register SrcSubReg,
1659+
Register Dst) const {
1660+
return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst)
1661+
.addReg(Src, 0, SrcSubReg);
1662+
}
1663+
16411664
/// Returns a \p outliner::OutlinedFunction struct containing target-specific
16421665
/// information for a set of outlining candidates.
16431666
virtual outliner::OutlinedFunction getOutliningCandidateInfo(

llvm/lib/CodeGen/PHIElimination.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@
3131
#include "llvm/CodeGen/MachineRegisterInfo.h"
3232
#include "llvm/CodeGen/SlotIndexes.h"
3333
#include "llvm/CodeGen/TargetInstrInfo.h"
34+
#include "llvm/CodeGen/TargetLowering.h"
3435
#include "llvm/CodeGen/TargetOpcodes.h"
36+
#include "llvm/CodeGen/TargetPassConfig.h"
3537
#include "llvm/CodeGen/TargetRegisterInfo.h"
3638
#include "llvm/CodeGen/TargetSubtargetInfo.h"
3739
#include "llvm/Pass.h"
@@ -252,11 +254,12 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
252254
// Insert a register to register copy at the top of the current block (but
253255
// after any remaining phi nodes) which copies the new incoming register
254256
// into the phi node destination.
257+
MachineInstr *PHICopy = nullptr;
255258
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
256259
if (allPhiOperandsUndefined(*MPhi, *MRI))
257260
// If all sources of a PHI node are implicit_def or undef uses, just emit an
258261
// implicit_def instead of a copy.
259-
BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
262+
PHICopy = BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
260263
TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
261264
else {
262265
// Can we reuse an earlier PHI node? This only happens for critical edges,
@@ -273,15 +276,13 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
273276
const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
274277
entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
275278
}
276-
BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
277-
TII->get(TargetOpcode::COPY), DestReg)
278-
.addReg(IncomingReg);
279+
// Give the target possiblity to handle special cases fallthrough otherwise
280+
PHICopy = TII->createPHIDestinationCopy(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
281+
IncomingReg, DestReg);
279282
}
280283

281284
// Update live variable information if there is any.
282285
if (LV) {
283-
MachineInstr &PHICopy = *std::prev(AfterPHIsIt);
284-
285286
if (IncomingReg) {
286287
LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
287288

@@ -302,7 +303,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
302303
// killed. Note that because the value is defined in several places (once
303304
// each for each incoming block), the "def" block and instruction fields
304305
// for the VarInfo is not filled in.
305-
LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
306+
LV->addVirtualRegisterKilled(IncomingReg, *PHICopy);
306307
}
307308

308309
// Since we are going to be deleting the PHI node, if it is the last use of
@@ -312,15 +313,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
312313

313314
// If the result is dead, update LV.
314315
if (isDead) {
315-
LV->addVirtualRegisterDead(DestReg, PHICopy);
316+
LV->addVirtualRegisterDead(DestReg, *PHICopy);
316317
LV->removeVirtualRegisterDead(DestReg, *MPhi);
317318
}
318319
}
319320

320321
// Update LiveIntervals for the new copy or implicit def.
321322
if (LIS) {
322-
SlotIndex DestCopyIndex =
323-
LIS->InsertMachineInstrInMaps(*std::prev(AfterPHIsIt));
323+
SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(*PHICopy);
324324

325325
SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB);
326326
if (IncomingReg) {
@@ -406,9 +406,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
406406
if (DefMI->isImplicitDef())
407407
ImpDefs.insert(DefMI);
408408
} else {
409-
NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
410-
TII->get(TargetOpcode::COPY), IncomingReg)
411-
.addReg(SrcReg, 0, SrcSubReg);
409+
NewSrcInstr =
410+
TII->createPHISourceCopy(opBlock, InsertPos, MPhi->getDebugLoc(),
411+
SrcReg, SrcSubReg, IncomingReg);
412412
}
413413
}
414414

@@ -457,7 +457,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
457457
}
458458
} else {
459459
// We just inserted this copy.
460-
KillInst = std::prev(InsertPos);
460+
KillInst = NewSrcInstr;
461461
}
462462
}
463463
assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6410,3 +6410,40 @@ bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
64106410
return true;
64116411
}
64126412
}
6413+
6414+
MachineInstr *SIInstrInfo::createPHIDestinationCopy(
6415+
MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt,
6416+
const DebugLoc &DL, Register Src, Register Dst) const {
6417+
auto Cur = MBB.begin();
6418+
if (Cur != MBB.end())
6419+
do {
6420+
if (!Cur->isPHI() && Cur->readsRegister(Dst))
6421+
return BuildMI(MBB, Cur, DL, get(TargetOpcode::COPY), Dst).addReg(Src);
6422+
++Cur;
6423+
} while (Cur != MBB.end() && Cur != LastPHIIt);
6424+
6425+
return TargetInstrInfo::createPHIDestinationCopy(MBB, LastPHIIt, DL, Src,
6426+
Dst);
6427+
}
6428+
6429+
MachineInstr *SIInstrInfo::createPHISourceCopy(
6430+
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt,
6431+
const DebugLoc &DL, Register Src, Register SrcSubReg, Register Dst) const {
6432+
if (InsPt != MBB.end() &&
6433+
(InsPt->getOpcode() == AMDGPU::SI_IF ||
6434+
InsPt->getOpcode() == AMDGPU::SI_ELSE ||
6435+
InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
6436+
InsPt->definesRegister(Src)) {
6437+
InsPt++;
6438+
return BuildMI(MBB, InsPt, InsPt->getDebugLoc(),
6439+
get(ST.isWave32() ? AMDGPU::S_MOV_B32_term
6440+
: AMDGPU::S_MOV_B64_term),
6441+
Dst)
6442+
.addReg(Src, 0, SrcSubReg)
6443+
.addReg(AMDGPU::EXEC, RegState::Implicit);
6444+
}
6445+
return TargetInstrInfo::createPHISourceCopy(MBB, InsPt, DL, Src, SrcSubReg,
6446+
Dst);
6447+
}
6448+
6449+
bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); }

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -954,6 +954,19 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
954954

955955
bool isBasicBlockPrologue(const MachineInstr &MI) const override;
956956

957+
MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
958+
MachineBasicBlock::iterator InsPt,
959+
const DebugLoc &DL, Register Src,
960+
Register Dst) const override;
961+
962+
MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
963+
MachineBasicBlock::iterator InsPt,
964+
const DebugLoc &DL, Register Src,
965+
Register SrcSubReg,
966+
Register Dst) const override;
967+
968+
bool isWave32() const;
969+
957970
/// Return a partially built integer add instruction without carry.
958971
/// Caller must add source operands.
959972
/// For pre-GFX9 it will generate unused carry destination operand.

llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ class SILowerControlFlow : public MachineFunctionPass {
9898
void emitLoop(MachineInstr &MI);
9999
void emitEndCf(MachineInstr &MI);
100100

101+
Register getSaveExec(MachineInstr* MI);
102+
101103
void findMaskOperands(MachineInstr &MI, unsigned OpNo,
102104
SmallVectorImpl<MachineOperand> &Src) const;
103105

@@ -175,17 +177,31 @@ static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI,
175177
return true;
176178
}
177179

180+
Register SILowerControlFlow::getSaveExec(MachineInstr *MI) {
181+
MachineBasicBlock *MBB = MI->getParent();
182+
MachineOperand &SaveExec = MI->getOperand(0);
183+
assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister);
184+
185+
Register SaveExecReg = SaveExec.getReg();
186+
unsigned FalseTermOpc =
187+
TII->isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term;
188+
MachineBasicBlock::iterator I = (MI);
189+
MachineBasicBlock::iterator J = std::next(I);
190+
if (J != MBB->end() && J->getOpcode() == FalseTermOpc &&
191+
J->getOperand(1).isReg() && J->getOperand(1).getReg() == SaveExecReg) {
192+
SaveExecReg = J->getOperand(0).getReg();
193+
J->eraseFromParent();
194+
}
195+
return SaveExecReg;
196+
}
197+
178198
void SILowerControlFlow::emitIf(MachineInstr &MI) {
179199
MachineBasicBlock &MBB = *MI.getParent();
180200
const DebugLoc &DL = MI.getDebugLoc();
181201
MachineBasicBlock::iterator I(&MI);
182-
183-
MachineOperand &SaveExec = MI.getOperand(0);
184-
MachineOperand &Cond = MI.getOperand(1);
185-
assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister &&
186-
Cond.getSubReg() == AMDGPU::NoSubRegister);
187-
188-
Register SaveExecReg = SaveExec.getReg();
202+
Register SaveExecReg = getSaveExec(&MI);
203+
MachineOperand& Cond = MI.getOperand(1);
204+
assert(Cond.getSubReg() == AMDGPU::NoSubRegister);
189205

190206
MachineOperand &ImpDefSCC = MI.getOperand(4);
191207
assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
@@ -266,8 +282,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
266282
MachineBasicBlock &MBB = *MI.getParent();
267283
const DebugLoc &DL = MI.getDebugLoc();
268284

269-
Register DstReg = MI.getOperand(0).getReg();
270-
assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister);
285+
Register DstReg = getSaveExec(&MI);
271286

272287
bool ExecModified = MI.getOperand(3).getImm() != 0;
273288
MachineBasicBlock::iterator Start = MBB.begin();
@@ -339,7 +354,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
339354
void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
340355
MachineBasicBlock &MBB = *MI.getParent();
341356
const DebugLoc &DL = MI.getDebugLoc();
342-
auto Dst = MI.getOperand(0).getReg();
357+
auto Dst = getSaveExec(&MI);
343358

344359
// Skip ANDing with exec if the break condition is already masked by exec
345360
// because it is a V_CMP in the same basic block. (We know the break
@@ -400,13 +415,17 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
400415

401416
void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
402417
MachineBasicBlock &MBB = *MI.getParent();
418+
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
419+
unsigned CFMask = MI.getOperand(0).getReg();
420+
MachineInstr *Def = MRI.getUniqueVRegDef(CFMask);
403421
const DebugLoc &DL = MI.getDebugLoc();
404422

405-
MachineBasicBlock::iterator InsPt = MBB.begin();
406-
MachineInstr *NewMI =
407-
BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
408-
.addReg(Exec)
409-
.add(MI.getOperand(0));
423+
MachineBasicBlock::iterator InsPt =
424+
Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def))
425+
: MBB.begin();
426+
MachineInstr *NewMI = BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
427+
.addReg(Exec)
428+
.add(MI.getOperand(0));
410429

411430
if (LIS)
412431
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);

llvm/test/CodeGen/AMDGPU/phi-elimination-assertion.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ body: |
2626

2727
# CHECK-LABEL: name: foo
2828
# CHECK: bb.3:
29-
# CHECK-NEXT: %3:sreg_32_xm0 = COPY killed %4
3029
# CHECK-NEXT: dead %2:sreg_32_xm0 = IMPLICIT_DEF
30+
# CHECK-NEXT: %3:sreg_32_xm0 = COPY killed %4
3131
# CHECK-NEXT: S_NOP 0, implicit killed %3
3232

3333

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# RUN: llc -mtriple amdgcn -run-pass livevars -run-pass phi-node-elimination -verify-machineinstrs -o - %s | FileCheck %s
2+
3+
# CHECK-LABEL: phi-cf-test
4+
# CHECK: bb.0:
5+
# CHECK: [[COND:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64
6+
# CHECK: [[IF_SOURCE0:%[0-9]+]]:sreg_64 = SI_IF [[COND]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
7+
# CHECK: [[IF_INPUT_REG:%[0-9]+]]:sreg_64 = S_MOV_B64_term killed [[IF_SOURCE0]], implicit $exec
8+
9+
# CHECK: bb.1:
10+
# CHECK: [[END_CF_ARG:%[0-9]+]]:sreg_64 = COPY killed [[IF_INPUT_REG]]
11+
# CHECK: SI_END_CF killed [[END_CF_ARG]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
12+
13+
# CHECK: bb.2:
14+
# CHECK: [[IF_SOURCE1:%[0-9]+]]:sreg_64 = SI_IF [[COND]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
15+
# CHECK: [[IF_INPUT_REG]]:sreg_64 = S_MOV_B64_term killed [[IF_SOURCE1]], implicit $exec
16+
17+
18+
...
19+
---
20+
name: phi-cf-test
21+
tracksRegLiveness: true
22+
body: |
23+
24+
bb.0:
25+
successors: %bb.3(0x40000000), %bb.2(0x40000000)
26+
liveins: $vgpr0
27+
28+
%5:vgpr_32(s32) = COPY $vgpr0
29+
%0:sreg_64 = V_CMP_EQ_U32_e64 0, %5(s32), implicit $exec
30+
%18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
31+
%22:sreg_64 = SI_IF %0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
32+
S_BRANCH %bb.3
33+
34+
bb.2:
35+
successors: %bb.3(0x80000000)
36+
37+
%24:sreg_64 = PHI %20, %bb.3, %22, %bb.0
38+
%23:vgpr_32 = PHI %19, %bb.3, %18, %bb.0
39+
SI_END_CF %24, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
40+
%3:vgpr_32, dead %10:sreg_64 = nsw V_ADD_I32_e64 1, %23, 0, implicit $exec
41+
42+
bb.3:
43+
successors: %bb.3(0x40000000), %bb.2(0x40000000)
44+
45+
%4:vgpr_32 = PHI %19, %bb.3, %3, %bb.2, %18, %bb.0
46+
%15:sreg_32_xm0 = S_MOV_B32 61440
47+
%16:sreg_32_xm0 = S_MOV_B32 -1
48+
%17:sreg_128 = REG_SEQUENCE undef %14:sreg_32_xm0, %subreg.sub0, undef %12:sreg_32_xm0, %subreg.sub1, %16, %subreg.sub2, %15, %subreg.sub3
49+
BUFFER_STORE_DWORD_OFFSET %4, %17, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
50+
%19:vgpr_32 = COPY %4
51+
%20:sreg_64 = SI_IF %0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
52+
S_BRANCH %bb.3
53+
54+
...

0 commit comments

Comments
 (0)