Skip to content

Commit 3240ae7

Browse files
committed
AMDGPU/GlobalISel: Set dead on scc on manually selected instructions
In SelectionDAG InstrEmitter automatically puts dead flags on unused physreg defs everywhere. The generated selectors should also set dead on physreg defs that were not used in the pattern.
1 parent b299ec1 commit 3240ae7

27 files changed

+904
-316
lines changed

llvm/include/llvm/CodeGen/MachineInstrBuilder.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,11 @@ class MachineInstrBuilder {
281281
return *this;
282282
}
283283

284+
const MachineInstrBuilder &setOperandDead(unsigned OpIdx) const {
285+
MI->getOperand(OpIdx).setIsDead();
286+
return *this;
287+
}
288+
284289
// Add a displacement from an existing MachineOperand with an added offset.
285290
const MachineInstrBuilder &addDisp(const MachineOperand &Disp, int64_t off,
286291
unsigned char TargetFlags = 0) const {

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 52 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -159,11 +159,15 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
159159

160160
// TODO: Skip masking high bits if def is known boolean.
161161

162+
bool IsSGPR = TRI.isSGPRClass(SrcRC);
162163
unsigned AndOpc =
163-
TRI.isSGPRClass(SrcRC) ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
164-
BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
164+
IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
165+
auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
165166
.addImm(1)
166167
.addReg(SrcReg);
168+
if (IsSGPR)
169+
And.setOperandDead(3); // Dead scc
170+
167171
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
168172
.addImm(0)
169173
.addReg(MaskedReg);
@@ -323,7 +327,8 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
323327
MachineInstr *Add =
324328
BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
325329
.add(I.getOperand(1))
326-
.add(I.getOperand(2));
330+
.add(I.getOperand(2))
331+
.setOperandDead(3); // Dead scc
327332
I.eraseFromParent();
328333
return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
329334
}
@@ -370,7 +375,8 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
370375
.add(Lo2);
371376
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
372377
.add(Hi1)
373-
.add(Hi2);
378+
.add(Hi2)
379+
.setOperandDead(3); // Dead scc
374380
} else {
375381
const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
376382
Register CarryReg = MRI->createVirtualRegister(CarryRC);
@@ -437,14 +443,18 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
437443
unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
438444
unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
439445

440-
BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
446+
auto CarryInst = BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
441447
.add(I.getOperand(2))
442448
.add(I.getOperand(3));
443-
BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg)
444-
.addReg(AMDGPU::SCC);
445449

446-
if (!MRI->getRegClassOrNull(Dst1Reg))
447-
MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
450+
if (MRI->use_nodbg_empty(Dst1Reg)) {
451+
CarryInst.setOperandDead(3); // Dead scc
452+
} else {
453+
BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg)
454+
.addReg(AMDGPU::SCC);
455+
if (!MRI->getRegClassOrNull(Dst1Reg))
456+
MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
457+
}
448458

449459
if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
450460
!RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
@@ -741,7 +751,8 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const {
741751
// build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16
742752
auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
743753
.addReg(ShiftSrc0)
744-
.addImm(16);
754+
.addImm(16)
755+
.setOperandDead(3); // Dead scc
745756

746757
MI.eraseFromParent();
747758
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
@@ -1630,7 +1641,8 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
16301641
Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
16311642
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_LSHL_B32), M0Base)
16321643
.addReg(BaseOffset)
1633-
.addImm(16);
1644+
.addImm(16)
1645+
.setOperandDead(3); // Dead scc
16341646

16351647
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
16361648
.addReg(M0Base);
@@ -2195,7 +2207,8 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
21952207
} else {
21962208
BuildMI(*MBB, I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg0)
21972209
.addReg(HiReg)
2198-
.addImm(16);
2210+
.addImm(16)
2211+
.setOperandDead(3); // Dead scc
21992212
}
22002213

22012214
unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
@@ -2204,12 +2217,17 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
22042217

22052218
BuildMI(*MBB, I, DL, TII.get(MovOpc), ImmReg)
22062219
.addImm(0xffff);
2207-
BuildMI(*MBB, I, DL, TII.get(AndOpc), TmpReg1)
2220+
auto And = BuildMI(*MBB, I, DL, TII.get(AndOpc), TmpReg1)
22082221
.addReg(LoReg)
22092222
.addReg(ImmReg);
2210-
BuildMI(*MBB, I, DL, TII.get(OrOpc), DstReg)
2223+
auto Or = BuildMI(*MBB, I, DL, TII.get(OrOpc), DstReg)
22112224
.addReg(TmpReg0)
22122225
.addReg(TmpReg1);
2226+
2227+
if (!IsVALU) {
2228+
And.setOperandDead(3); // Dead scc
2229+
Or.setOperandDead(3); // Dead scc
2230+
}
22132231
}
22142232

22152233
I.eraseFromParent();
@@ -2354,7 +2372,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
23542372
if (Signed) {
23552373
BuildMI(MBB, I, DL, TII.get(AMDGPU::S_ASHR_I32), HiReg)
23562374
.addReg(SrcReg, 0, SubReg)
2357-
.addImm(31);
2375+
.addImm(31)
2376+
.setOperandDead(3); // Dead scc
23582377
} else {
23592378
BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
23602379
.addImm(0);
@@ -2398,7 +2417,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
23982417
if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
23992418
BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
24002419
.addReg(SrcReg)
2401-
.addImm(Mask);
2420+
.addImm(Mask)
2421+
.setOperandDead(3); // Dead scc
24022422
} else {
24032423
BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
24042424
.addReg(SrcReg)
@@ -2532,7 +2552,8 @@ bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr &MI) const {
25322552
unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
25332553
BuildMI(*BB, &MI, DL, TII.get(Opc), OpReg)
25342554
.addReg(HiReg)
2535-
.addReg(ConstReg);
2555+
.addReg(ConstReg)
2556+
.setOperandDead(3); // Dead scc
25362557
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
25372558
.addReg(LoReg)
25382559
.addImm(AMDGPU::sub0)
@@ -2573,7 +2594,8 @@ bool AMDGPUInstructionSelector::selectG_FABS(MachineInstr &MI) const {
25732594
// TODO: Should this used S_BITSET0_*?
25742595
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_AND_B32), OpReg)
25752596
.addReg(HiReg)
2576-
.addReg(ConstReg);
2597+
.addReg(ConstReg)
2598+
.setOperandDead(3); // Dead scc
25772599
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
25782600
.addReg(LoReg)
25792601
.addImm(AMDGPU::sub0)
@@ -2731,7 +2753,8 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
27312753
Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
27322754
BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg)
27332755
.addReg(CondReg)
2734-
.addReg(Exec);
2756+
.addReg(Exec)
2757+
.setOperandDead(3); // Dead scc
27352758
CondReg = TmpReg;
27362759
}
27372760

@@ -2794,7 +2817,8 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
27942817
!CanCopyLow32 && !CanCopyHi32) {
27952818
auto MIB = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_AND_B64), DstReg)
27962819
.addReg(SrcReg)
2797-
.addReg(MaskReg);
2820+
.addReg(MaskReg)
2821+
.setOperandDead(3); // Dead scc
27982822
I.eraseFromParent();
27992823
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
28002824
}
@@ -2817,9 +2841,12 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
28172841
assert(MaskTy.getSizeInBits() == 32 &&
28182842
"ptrmask should have been narrowed during legalize");
28192843

2820-
BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg)
2844+
auto NewOp = BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg)
28212845
.addReg(SrcReg)
28222846
.addReg(MaskReg);
2847+
2848+
if (!IsVGPR)
2849+
NewOp.setOperandDead(3); // Dead scc
28232850
I.eraseFromParent();
28242851
return true;
28252852
}
@@ -3325,7 +3352,8 @@ bool AMDGPUInstructionSelector::selectWaveAddress(MachineInstr &MI) const {
33253352
} else {
33263353
BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), DstReg)
33273354
.addReg(SrcReg)
3328-
.addImm(Subtarget->getWavefrontSizeLog2());
3355+
.addImm(Subtarget->getWavefrontSizeLog2())
3356+
.setOperandDead(3); // Dead scc
33293357
}
33303358

33313359
const TargetRegisterClass &RC =
@@ -4114,7 +4142,8 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
41144142

41154143
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
41164144
.addFrameIndex(FI)
4117-
.addReg(RHSDef->Reg);
4145+
.addReg(RHSDef->Reg)
4146+
.setOperandDead(3); // Dead scc
41184147
}
41194148
}
41204149

0 commit comments

Comments
 (0)