Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 0e0a424

Browse files
committed
[RegisterCoalescing] Recommit the patch "Remove partial redundent copy".
The recommit fixes a bug related with live interval update after the partial redundent copy is moved. The original patch is to solve the performance problem described in PR27827. Register coalescing sometimes cannot remove a copy because of interference. But if we can find a reverse copy in one of the predecessor block of the copy, the copy is partially redundent and we may remove the copy partially by moving it to the predecessor block without the reverse copy. Differential Revision: https://reviews.llvm.org/D28585 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292621 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 1d1dc06 commit 0e0a424

File tree

4 files changed

+625
-0
lines changed

4 files changed

+625
-0
lines changed

lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/CodeGen/LiveRangeEdit.h"
2323
#include "llvm/CodeGen/MachineFrameInfo.h"
2424
#include "llvm/CodeGen/MachineInstr.h"
25+
#include "llvm/CodeGen/MachineInstrBuilder.h"
2526
#include "llvm/CodeGen/MachineLoopInfo.h"
2627
#include "llvm/CodeGen/MachineRegisterInfo.h"
2728
#include "llvm/CodeGen/Passes.h"
@@ -189,6 +190,9 @@ namespace {
189190
/// This returns true if an interval was modified.
190191
bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
191192

193+
/// We found a copy which can be moved to its less frequent predecessor.
194+
bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
195+
192196
/// If the source of a copy is defined by a
193197
/// trivial computation, replace the copy by rematerialize the definition.
194198
bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
@@ -861,6 +865,167 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
861865
return true;
862866
}
863867

868+
/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
869+
/// predecessor of BB2, and if B is not redefined on the way from A = B
870+
/// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the
871+
/// execution goes through the path from BB0 to BB2. We may move B = A
872+
/// to the predecessor without such reversed copy.
873+
/// So we will transform the program from:
874+
/// BB0:
875+
/// A = B; BB1:
876+
/// ... ...
877+
/// / \ /
878+
/// BB2:
879+
/// ...
880+
/// B = A;
881+
///
882+
/// to:
883+
///
884+
/// BB0: BB1:
885+
/// A = B; ...
886+
/// ... B = A;
887+
/// / \ /
888+
/// BB2:
889+
/// ...
890+
///
891+
/// A special case is when BB0 and BB2 are the same BB which is the only
892+
/// BB in a loop:
893+
/// BB1:
894+
/// ...
895+
/// BB0/BB2: ----
896+
/// B = A; |
897+
/// ... |
898+
/// A = B; |
899+
/// |-------
900+
/// |
901+
/// We may hoist B = A from BB0/BB2 to BB1.
902+
///
903+
/// The major preconditions for correctness to remove such partial
904+
/// redundancy include:
905+
/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
906+
/// the PHI is defined by the reversed copy A = B in BB0.
907+
/// 2. No B is referenced from the start of BB2 to B = A.
908+
/// 3. No B is defined from A = B to the end of BB0.
909+
/// 4. BB1 has only one successor.
910+
///
911+
/// 2 and 4 implicitly ensure B is not live at the end of BB1.
912+
/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
913+
/// colder place, which not only prevent endless loop, but also make sure
914+
/// the movement of copy is beneficial.
915+
bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
916+
MachineInstr &CopyMI) {
917+
assert(!CP.isPhys());
918+
if (!CopyMI.isFullCopy())
919+
return false;
920+
921+
MachineBasicBlock &MBB = *CopyMI.getParent();
922+
if (MBB.isEHPad())
923+
return false;
924+
925+
if (MBB.pred_size() != 2)
926+
return false;
927+
928+
LiveInterval &IntA =
929+
LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
930+
LiveInterval &IntB =
931+
LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
932+
933+
// A is defined by PHI at the entry of MBB.
934+
SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
935+
VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
936+
assert(AValNo && !AValNo->isUnused() && "COPY source not live");
937+
if (!AValNo->isPHIDef())
938+
return false;
939+
940+
// No B is referenced before CopyMI in MBB.
941+
if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
942+
return false;
943+
944+
// MBB has two predecessors: one contains A = B so no copy will be inserted
945+
// for it. The other one will have a copy moved from MBB.
946+
bool FoundReverseCopy = false;
947+
MachineBasicBlock *CopyLeftBB = nullptr;
948+
for (MachineBasicBlock *Pred : MBB.predecessors()) {
949+
VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
950+
MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def);
951+
if (!DefMI || !DefMI->isFullCopy()) {
952+
CopyLeftBB = Pred;
953+
continue;
954+
}
955+
// Check DefMI is a reverse copy and it is in BB Pred.
956+
if (DefMI->getOperand(0).getReg() != IntA.reg ||
957+
DefMI->getOperand(1).getReg() != IntB.reg ||
958+
DefMI->getParent() != Pred) {
959+
CopyLeftBB = Pred;
960+
continue;
961+
}
962+
// If there is any other def of B after DefMI and before the end of Pred,
963+
// we need to keep the copy of B = A at the end of Pred if we remove
964+
// B = A from MBB.
965+
bool ValB_Changed = false;
966+
for (auto VNI : IntB.valnos) {
967+
if (VNI->isUnused())
968+
continue;
969+
if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
970+
ValB_Changed = true;
971+
break;
972+
}
973+
}
974+
if (ValB_Changed) {
975+
CopyLeftBB = Pred;
976+
continue;
977+
}
978+
FoundReverseCopy = true;
979+
}
980+
981+
// If no reverse copy is found in predecessors, nothing to do.
982+
if (!FoundReverseCopy)
983+
return false;
984+
985+
// If CopyLeftBB is nullptr, it means every predecessor of MBB contains
986+
// reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
987+
// If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
988+
// update IntA/IntB.
989+
//
990+
// If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
991+
// MBB is hotter than CopyLeftBB.
992+
if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
993+
return false;
994+
995+
// Now ok to move copy.
996+
if (CopyLeftBB) {
997+
DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to BB#"
998+
<< CopyLeftBB->getNumber() << '\t' << CopyMI);
999+
1000+
// Insert new copy to CopyLeftBB.
1001+
auto InsPos = CopyLeftBB->getFirstTerminator();
1002+
MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
1003+
TII->get(TargetOpcode::COPY), IntB.reg)
1004+
.addReg(IntA.reg);
1005+
SlotIndex NewCopyIdx =
1006+
LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
1007+
VNInfo *VNI = IntB.getNextValue(NewCopyIdx, LIS->getVNInfoAllocator());
1008+
IntB.createDeadDef(VNI);
1009+
} else {
1010+
DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#"
1011+
<< MBB.getNumber() << '\t' << CopyMI);
1012+
}
1013+
1014+
// Remove CopyMI.
1015+
SmallVector<SlotIndex, 8> EndPoints;
1016+
VNInfo *BValNo = IntB.Query(CopyIdx.getRegSlot()).valueOutOrDead();
1017+
LIS->pruneValue(IntB, CopyIdx.getRegSlot(), &EndPoints);
1018+
BValNo->markUnused();
1019+
LIS->RemoveMachineInstrFromMaps(CopyMI);
1020+
CopyMI.eraseFromParent();
1021+
1022+
// Extend IntB to the EndPoints of its original live interval.
1023+
LIS->extendToIndices(IntB, EndPoints);
1024+
1025+
shrinkToUses(&IntA);
1026+
return true;
1027+
}
1028+
8641029
/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
8651030
/// defining a subregister.
8661031
static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
@@ -1486,6 +1651,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
14861651
}
14871652
}
14881653

1654+
// Try and see if we can partially eliminate the copy by moving the copy to
1655+
// its predecessor.
1656+
if (!CP.isPartial() && !CP.isPhys())
1657+
if (removePartialRedundancy(CP, *CopyMI))
1658+
return true;
1659+
14891660
// Otherwise, we are unable to join the intervals.
14901661
DEBUG(dbgs() << "\tInterference!\n");
14911662
Again = true; // May be possible to coalesce later.

0 commit comments

Comments
 (0)