Skip to content

Commit a3db9c0

Browse files
Yi-Hong LyuYi-Hong Lyu
authored andcommitted
[PowerPC] Remove redundant CRSET/CRUNSET in custom lowering of known CR bit spills
We lower known CR bit spills (CRSET/CRUNSET) to load and spill the known value but forgot to remove the redundant spills. e.g., This sequence was used to spill a CRUNSET: crclr 4*cr5+lt mfocrf r3,4 rlwinm r3,r3,20,0,0 stw r3,132(r1) Custom lowering of known CR bit spills lower it to: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt li r3,0 stw r3,132(r1) crxor is redundant if there is no use of 4*cr5+lt so we should remove it Differential revision: https://reviews.llvm.org/D67722
1 parent 9ee76ab commit a3db9c0

File tree

4 files changed

+42
-5
lines changed

4 files changed

+42
-5
lines changed

llvm/lib/Target/PowerPC/PPCInstrInfo.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3184,6 +3184,11 @@ def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentr
31843184
// the function label.
31853185
def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
31863186

3187+
// Pseudo-instruction marked for deletion. When deleting the instruction would
3188+
// cause iterator invalidation in MIR transformation passes, this pseudo can be
3189+
// used instead. It will be removed unconditionally at pre-emit time (prior to
3190+
// branch selection).
3191+
def UNENCODED_NOP: PPCEmitTimePseudo<(outs), (ins), "#UNENCODED_NOP", []>;
31873192

31883193
// Standard shifts. These are represented separately from the real shifts above
31893194
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift

llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,19 @@ namespace {
163163
}
164164

165165
bool runOnMachineFunction(MachineFunction &MF) override {
166-
if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole)
166+
if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
167+
// Remove UNENCODED_NOP even when this pass is disabled.
168+
// This needs to be done unconditionally so we don't emit zeros
169+
// in the instruction stream.
170+
SmallVector<MachineInstr *, 4> InstrsToErase;
171+
for (MachineBasicBlock &MBB : MF)
172+
for (MachineInstr &MI : MBB)
173+
if (MI.getOpcode() == PPC::UNENCODED_NOP)
174+
InstrsToErase.push_back(&MI);
175+
for (MachineInstr *MI : InstrsToErase)
176+
MI->eraseFromParent();
167177
return false;
178+
}
168179
bool Changed = false;
169180
const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
170181
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
@@ -173,6 +184,10 @@ namespace {
173184
Changed |= removeRedundantLIs(MBB, TRI);
174185
for (MachineInstr &MI : MBB) {
175186
unsigned Opc = MI.getOpcode();
187+
if (Opc == PPC::UNENCODED_NOP) {
188+
InstrsToErase.push_back(&MI);
189+
continue;
190+
}
176191
// Detect self copies - these can result from running AADB.
177192
if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
178193
const MCInstrDesc &MCID = TII->get(Opc);

llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -747,12 +747,18 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
747747
Register SrcReg = MI.getOperand(0).getReg();
748748

749749
// Search up the BB to find the definition of the CR bit.
750-
MachineBasicBlock::reverse_iterator Ins;
750+
MachineBasicBlock::reverse_iterator Ins = MI;
751+
MachineBasicBlock::reverse_iterator Rend = MBB.rend();
752+
++Ins;
751753
unsigned CRBitSpillDistance = 0;
752-
for (Ins = MI; Ins != MBB.rend(); Ins++) {
754+
bool SeenUse = false;
755+
for (; Ins != Rend; ++Ins) {
753756
// Definition found.
754757
if (Ins->modifiesRegister(SrcReg, TRI))
755758
break;
759+
// Use found.
760+
if (Ins->readsRegister(SrcReg, TRI))
761+
SeenUse = true;
756762
// Unable to find CR bit definition within maximum search distance.
757763
if (CRBitSpillDistance == MaxCRBitSpillDist) {
758764
Ins = MI;
@@ -767,15 +773,18 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
767773
if (Ins == MBB.rend())
768774
Ins = MI;
769775

776+
bool SpillsKnownBit = false;
770777
// There is no need to extract the CR bit if its value is already known.
771778
switch (Ins->getOpcode()) {
772779
case PPC::CRUNSET:
773780
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LI8 : PPC::LI), Reg)
774781
.addImm(0);
782+
SpillsKnownBit = true;
775783
break;
776784
case PPC::CRSET:
777785
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LIS8 : PPC::LIS), Reg)
778786
.addImm(-32768);
787+
SpillsKnownBit = true;
779788
break;
780789
default:
781790
// We need to move the CR field that contains the CR bit we are spilling.
@@ -803,8 +812,13 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
803812
.addReg(Reg, RegState::Kill),
804813
FrameIndex);
805814

815+
bool KillsCRBit = MI.killsRegister(SrcReg, TRI);
806816
// Discard the pseudo instruction.
807817
MBB.erase(II);
818+
if (SpillsKnownBit && KillsCRBit && !SeenUse) {
819+
Ins->setDesc(TII.get(PPC::UNENCODED_NOP));
820+
Ins->RemoveOperand(0);
821+
}
808822
}
809823

810824
void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,

llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
33
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 \
44
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
5+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -ppc-late-peephole=false \
6+
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s \
7+
; RUN: --implicit-check-not creqv --implicit-check-not crxor
58

69

710
; For known CRBit spills, CRSET/CRUNSET, it is more efficient to just load and
@@ -21,7 +24,7 @@ define dso_local signext i32 @spillCRSET(i32 signext %p1, i32 signext %p2) {
2124
; CHECK-DAG: mfocrf [[REG2:.*]], [[CREG]]
2225
; CHECK-DAG: rlwinm [[REG2]], [[REG2]]
2326
; CHECK: .LBB0_3:
24-
; CHECK-DAG: creqv [[CREG:.*]]*cr5+lt, [[CREG]]*cr5+lt, [[CREG]]*cr5+lt
27+
; CHECK-NOT: #UNENCODED_NOP
2528
; CHECK: lis [[REG1:.*]], -32768
2629
; CHECK: .LBB0_4:
2730
; CHECK-NOT: mfocrf [[REG2:.*]], [[CREG]]
@@ -81,8 +84,8 @@ if.end13: ; preds = %if.then6, %for.end,
8184
define dso_local signext i32 @spillCRUNSET(%struct.p5rx* readonly %p1, i32 signext %p2, i32 signext %p3) {
8285
; CHECK-LABEL: spillCRUNSET:
8386
; CHECK: # %bb.0: # %entry
84-
; CHECK-DAG: crxor [[CREG:.*]]*cr5+lt, [[CREG]]*cr5+lt, [[CREG]]*cr5+lt
8587
; CHECK-DAG: li [[REG1:.*]], 0
88+
; CHECK-NOT: #UNENCODED_NOP
8689
; CHECK-NOT: mfocrf [[REG2:.*]], [[CREG]]
8790
; CHECK-NOT: rlwinm [[REG2]], [[REG2]]
8891
; CHECK: stw [[REG1]]

0 commit comments

Comments
 (0)