|
22 | 22 | #include "llvm/CodeGen/LiveRangeEdit.h"
|
23 | 23 | #include "llvm/CodeGen/MachineFrameInfo.h"
|
24 | 24 | #include "llvm/CodeGen/MachineInstr.h"
|
| 25 | +#include "llvm/CodeGen/MachineInstrBuilder.h" |
25 | 26 | #include "llvm/CodeGen/MachineLoopInfo.h"
|
26 | 27 | #include "llvm/CodeGen/MachineRegisterInfo.h"
|
27 | 28 | #include "llvm/CodeGen/Passes.h"
|
@@ -189,6 +190,9 @@ namespace {
|
189 | 190 | /// This returns true if an interval was modified.
|
190 | 191 | bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
|
191 | 192 |
|
| 193 | + /// We found a copy which can be moved to its less frequent predecessor. |
| 194 | + bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI); |
| 195 | + |
192 | 196 | /// If the source of a copy is defined by a
|
193 | 197 | /// trivial computation, replace the copy by rematerialize the definition.
|
194 | 198 | bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
|
@@ -861,6 +865,167 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
|
861 | 865 | return true;
|
862 | 866 | }
|
863 | 867 |
|
| 868 | +/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a |
| 869 | +/// predecessor of BB2, and if B is not redefined on the way from A = B |
| 870 | +/// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the |
| 871 | +/// execution goes through the path from BB0 to BB2. We may move B = A |
| 872 | +/// to the predecessor without such reversed copy. |
| 873 | +/// So we will transform the program from: |
| 874 | +/// BB0: |
| 875 | +/// A = B; BB1: |
| 876 | +/// ... ... |
| 877 | +/// / \ / |
| 878 | +/// BB2: |
| 879 | +/// ... |
| 880 | +/// B = A; |
| 881 | +/// |
| 882 | +/// to: |
| 883 | +/// |
| 884 | +/// BB0: BB1: |
| 885 | +/// A = B; ... |
| 886 | +/// ... B = A; |
| 887 | +/// / \ / |
| 888 | +/// BB2: |
| 889 | +/// ... |
| 890 | +/// |
| 891 | +/// A special case is when BB0 and BB2 are the same BB which is the only |
| 892 | +/// BB in a loop: |
| 893 | +/// BB1: |
| 894 | +/// ... |
| 895 | +/// BB0/BB2: ---- |
| 896 | +/// B = A; | |
| 897 | +/// ... | |
| 898 | +/// A = B; | |
| 899 | +/// |------- |
| 900 | +/// | |
| 901 | +/// We may hoist B = A from BB0/BB2 to BB1. |
| 902 | +/// |
| 903 | +/// The major preconditions for correctness to remove such partial |
| 904 | +/// redundancy include: |
| 905 | +/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of |
| 906 | +/// the PHI is defined by the reversed copy A = B in BB0. |
| 907 | +/// 2. No B is referenced from the start of BB2 to B = A. |
| 908 | +/// 3. No B is defined from A = B to the end of BB0. |
| 909 | +/// 4. BB1 has only one successor. |
| 910 | +/// |
| 911 | +/// 2 and 4 implicitly ensure B is not live at the end of BB1. |
| 912 | +/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a |
| 913 | +/// colder place, which not only prevent endless loop, but also make sure |
| 914 | +/// the movement of copy is beneficial. |
| 915 | +bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, |
| 916 | + MachineInstr &CopyMI) { |
| 917 | + assert(!CP.isPhys()); |
| 918 | + if (!CopyMI.isFullCopy()) |
| 919 | + return false; |
| 920 | + |
| 921 | + MachineBasicBlock &MBB = *CopyMI.getParent(); |
| 922 | + if (MBB.isEHPad()) |
| 923 | + return false; |
| 924 | + |
| 925 | + if (MBB.pred_size() != 2) |
| 926 | + return false; |
| 927 | + |
| 928 | + LiveInterval &IntA = |
| 929 | + LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); |
| 930 | + LiveInterval &IntB = |
| 931 | + LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); |
| 932 | + |
| 933 | + // A is defined by PHI at the entry of MBB. |
| 934 | + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); |
| 935 | + VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx); |
| 936 | + assert(AValNo && !AValNo->isUnused() && "COPY source not live"); |
| 937 | + if (!AValNo->isPHIDef()) |
| 938 | + return false; |
| 939 | + |
| 940 | + // No B is referenced before CopyMI in MBB. |
| 941 | + if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx)) |
| 942 | + return false; |
| 943 | + |
| 944 | + // MBB has two predecessors: one contains A = B so no copy will be inserted |
| 945 | + // for it. The other one will have a copy moved from MBB. |
| 946 | + bool FoundReverseCopy = false; |
| 947 | + MachineBasicBlock *CopyLeftBB = nullptr; |
| 948 | + for (MachineBasicBlock *Pred : MBB.predecessors()) { |
| 949 | + VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred)); |
| 950 | + MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def); |
| 951 | + if (!DefMI || !DefMI->isFullCopy()) { |
| 952 | + CopyLeftBB = Pred; |
| 953 | + continue; |
| 954 | + } |
| 955 | + // Check DefMI is a reverse copy and it is in BB Pred. |
| 956 | + if (DefMI->getOperand(0).getReg() != IntA.reg || |
| 957 | + DefMI->getOperand(1).getReg() != IntB.reg || |
| 958 | + DefMI->getParent() != Pred) { |
| 959 | + CopyLeftBB = Pred; |
| 960 | + continue; |
| 961 | + } |
| 962 | + // If there is any other def of B after DefMI and before the end of Pred, |
| 963 | + // we need to keep the copy of B = A at the end of Pred if we remove |
| 964 | + // B = A from MBB. |
| 965 | + bool ValB_Changed = false; |
| 966 | + for (auto VNI : IntB.valnos) { |
| 967 | + if (VNI->isUnused()) |
| 968 | + continue; |
| 969 | + if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) { |
| 970 | + ValB_Changed = true; |
| 971 | + break; |
| 972 | + } |
| 973 | + } |
| 974 | + if (ValB_Changed) { |
| 975 | + CopyLeftBB = Pred; |
| 976 | + continue; |
| 977 | + } |
| 978 | + FoundReverseCopy = true; |
| 979 | + } |
| 980 | + |
| 981 | + // If no reverse copy is found in predecessors, nothing to do. |
| 982 | + if (!FoundReverseCopy) |
| 983 | + return false; |
| 984 | + |
| 985 | + // If CopyLeftBB is nullptr, it means every predecessor of MBB contains |
| 986 | + // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated. |
| 987 | + // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and |
| 988 | + // update IntA/IntB. |
| 989 | + // |
| 990 | + // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so |
| 991 | + // MBB is hotter than CopyLeftBB. |
| 992 | + if (CopyLeftBB && CopyLeftBB->succ_size() > 1) |
| 993 | + return false; |
| 994 | + |
| 995 | + // Now ok to move copy. |
| 996 | + if (CopyLeftBB) { |
| 997 | + DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to BB#" |
| 998 | + << CopyLeftBB->getNumber() << '\t' << CopyMI); |
| 999 | + |
| 1000 | + // Insert new copy to CopyLeftBB. |
| 1001 | + auto InsPos = CopyLeftBB->getFirstTerminator(); |
| 1002 | + MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(), |
| 1003 | + TII->get(TargetOpcode::COPY), IntB.reg) |
| 1004 | + .addReg(IntA.reg); |
| 1005 | + SlotIndex NewCopyIdx = |
| 1006 | + LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot(); |
| 1007 | + VNInfo *VNI = IntB.getNextValue(NewCopyIdx, LIS->getVNInfoAllocator()); |
| 1008 | + IntB.createDeadDef(VNI); |
| 1009 | + } else { |
| 1010 | + DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#" |
| 1011 | + << MBB.getNumber() << '\t' << CopyMI); |
| 1012 | + } |
| 1013 | + |
| 1014 | + // Remove CopyMI. |
| 1015 | + SmallVector<SlotIndex, 8> EndPoints; |
| 1016 | + VNInfo *BValNo = IntB.Query(CopyIdx.getRegSlot()).valueOutOrDead(); |
| 1017 | + LIS->pruneValue(IntB, CopyIdx.getRegSlot(), &EndPoints); |
| 1018 | + BValNo->markUnused(); |
| 1019 | + LIS->RemoveMachineInstrFromMaps(CopyMI); |
| 1020 | + CopyMI.eraseFromParent(); |
| 1021 | + |
| 1022 | + // Extend IntB to the EndPoints of its original live interval. |
| 1023 | + LIS->extendToIndices(IntB, EndPoints); |
| 1024 | + |
| 1025 | + shrinkToUses(&IntA); |
| 1026 | + return true; |
| 1027 | +} |
| 1028 | + |
864 | 1029 | /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
|
865 | 1030 | /// defining a subregister.
|
866 | 1031 | static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
|
@@ -1486,6 +1651,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
|
1486 | 1651 | }
|
1487 | 1652 | }
|
1488 | 1653 |
|
| 1654 | + // Try and see if we can partially eliminate the copy by moving the copy to |
| 1655 | + // its predecessor. |
| 1656 | + if (!CP.isPartial() && !CP.isPhys()) |
| 1657 | + if (removePartialRedundancy(CP, *CopyMI)) |
| 1658 | + return true; |
| 1659 | + |
1489 | 1660 | // Otherwise, we are unable to join the intervals.
|
1490 | 1661 | DEBUG(dbgs() << "\tInterference!\n");
|
1491 | 1662 | Again = true; // May be possible to coalesce later.
|
|
0 commit comments