Skip to content

Commit e82a008

Browse files
committed
[ARM][LowOverheadLoops] Cleanup and re-arrange
Rename and reorganise how we decide where to put the LoopStart instruction.
1 parent 509fba7 commit e82a008

File tree

1 file changed

+86
-89
lines changed

1 file changed

+86
-89
lines changed

llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp

Lines changed: 86 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ namespace {
414414

415415
// Check the branch targets are within range and we satisfy our
416416
// restrictions.
417-
void CheckLegality(ARMBasicBlockUtils *BBUtils);
417+
void Validate(ARMBasicBlockUtils *BBUtils);
418418

419419
bool FoundAllComponents() const {
420420
return Start && Dec && End;
@@ -520,41 +520,20 @@ std::map<MachineInstr *,
520520
INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
521521
false, false)
522522

523-
MachineInstr *LowOverheadLoop::isSafeToDefineLR() {
524-
// We can define LR because LR already contains the same value.
525-
if (Start->getOperand(0).getReg() == ARM::LR)
526-
return Start;
527-
528-
unsigned CountReg = Start->getOperand(0).getReg();
529-
auto IsMoveLR = [&CountReg](MachineInstr *MI) {
530-
return MI->getOpcode() == ARM::tMOVr &&
531-
MI->getOperand(0).getReg() == ARM::LR &&
532-
MI->getOperand(1).getReg() == CountReg &&
533-
MI->getOperand(2).getImm() == ARMCC::AL;
534-
};
535-
536-
MachineBasicBlock *MBB = Start->getParent();
537-
538-
// Find an insertion point:
539-
// - Is there a (mov lr, Count) before Start? If so, and nothing else writes
540-
// to Count before Start, we can insert at that mov.
541-
if (auto *LRDef = RDA.getUniqueReachingMIDef(Start, ARM::LR))
542-
if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg))
543-
return LRDef;
544-
545-
// - Is there a (mov lr, Count) after Start? If so, and nothing else writes
546-
// to Count after Start, we can insert at that mov.
547-
if (auto *LRDef = RDA.getLocalLiveOutMIDef(MBB, ARM::LR))
548-
if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg))
549-
return LRDef;
550-
551-
// We've found no suitable LR def and Start doesn't use LR directly. Can we
552-
// just define LR anyway?
553-
return RDA.isSafeToDefRegAt(Start, ARM::LR) ? Start : nullptr;
554-
}
555-
556523
bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
524+
if (!StartInsertPt)
525+
return false;
526+
527+
if (!IsTailPredicationLegal()) {
528+
LLVM_DEBUG(if (VCTPs.empty())
529+
dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";
530+
dbgs() << "ARM Loops: Tail-predication is not valid.\n");
531+
return false;
532+
}
533+
557534
assert(!VCTPs.empty() && "VCTP instruction expected but is not set");
535+
assert(ML.getBlocks().size() == 1 &&
536+
"Shouldn't be processing a loop with more than one block");
558537

559538
if (DisableTailPredication) {
560539
LLVM_DEBUG(dbgs() << "ARM Loops: tail-predication is disabled\n");
@@ -631,15 +610,15 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
631610
// width, the Loop Start instruction will immediately generate one or more
632611
// false lane mask which can, incorrectly, affect the proceeding MVE
633612
// instructions in the preheader.
634-
auto cannotInsertWDLSTPBetween = [](MachineBasicBlock::iterator I,
613+
auto CannotInsertWDLSTPBetween = [](MachineBasicBlock::iterator I,
635614
MachineBasicBlock::iterator E) {
636615
for (; I != E; ++I)
637616
if (shouldInspect(*I))
638617
return true;
639618
return false;
640619
};
641620

642-
if (cannotInsertWDLSTPBetween(StartInsertPt, InsertBB->end()))
621+
if (CannotInsertWDLSTPBetween(StartInsertPt, InsertBB->end()))
643622
return false;
644623

645624
// Especially in the case of while loops, InsertBB may not be the
@@ -658,15 +637,9 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
658637
return false;
659638
};
660639

661-
// First, find the block that looks like the preheader.
640+
// Search backwards for a def, until we get to InsertBB.
662641
MachineBasicBlock *MBB = Preheader;
663-
if (!MBB) {
664-
LLVM_DEBUG(dbgs() << "ARM Loops: Didn't find preheader.\n");
665-
return false;
666-
}
667-
668-
// Then search backwards for a def, until we get to InsertBB.
669-
while (MBB != InsertBB) {
642+
while (MBB && MBB != InsertBB) {
670643
if (CannotProvideElements(MBB, NumElements)) {
671644
LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n");
672645
return false;
@@ -944,59 +917,83 @@ bool LowOverheadLoop::ValidateLiveOuts() {
944917
return true;
945918
}
946919

947-
void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils) {
920+
void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
948921
if (Revert)
949922
return;
950923

951-
if (!End->getOperand(1).isMBB())
952-
report_fatal_error("Expected LoopEnd to target basic block");
924+
auto ValidateRanges = [this, &BBUtils]() {
925+
if (!End->getOperand(1).isMBB())
926+
report_fatal_error("Expected LoopEnd to target basic block");
953927

954-
// TODO Maybe there's cases where the target doesn't have to be the header,
955-
// but for now be safe and revert.
956-
if (End->getOperand(1).getMBB() != ML.getHeader()) {
957-
LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n");
958-
Revert = true;
959-
return;
960-
}
961-
962-
// The WLS and LE instructions have 12-bits for the label offset. WLS
963-
// requires a positive offset, while LE uses negative.
964-
if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) ||
965-
!BBUtils->isBBInRange(End, ML.getHeader(), 4094)) {
966-
LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
967-
Revert = true;
968-
return;
969-
}
928+
// TODO Maybe there's cases where the target doesn't have to be the header,
929+
// but for now be safe and revert.
930+
if (End->getOperand(1).getMBB() != ML.getHeader()) {
931+
LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n");
932+
return false;
933+
}
970934

971-
if (Start->getOpcode() == ARM::t2WhileLoopStart &&
972-
(BBUtils->getOffsetOf(Start) >
973-
BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
974-
!BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
975-
LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
976-
Revert = true;
977-
return;
978-
}
935+
// The WLS and LE instructions have 12-bits for the label offset. WLS
936+
// requires a positive offset, while LE uses negative.
937+
if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) ||
938+
!BBUtils->isBBInRange(End, ML.getHeader(), 4094)) {
939+
LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
940+
return false;
941+
}
979942

980-
InsertPt = Revert ? nullptr : isSafeToDefineLR();
981-
if (!InsertPt) {
982-
LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
983-
Revert = true;
984-
return;
985-
} else
986-
LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt);
943+
if (Start->getOpcode() == ARM::t2WhileLoopStart &&
944+
(BBUtils->getOffsetOf(Start) >
945+
BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
946+
!BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
947+
LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
948+
return false;
949+
}
950+
return true;
951+
};
987952

988-
if (!IsTailPredicationLegal()) {
989-
LLVM_DEBUG(if (VCTPs.empty())
990-
dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";
991-
dbgs() << "ARM Loops: Tail-predication is not valid.\n");
992-
return;
993-
}
953+
auto FindStartInsertionPoint = [this]() -> MachineInstr* {
954+
// We can define LR because LR already contains the same value.
955+
if (Start->getOperand(0).getReg() == ARM::LR)
956+
return Start;
957+
958+
unsigned CountReg = Start->getOperand(0).getReg();
959+
auto IsMoveLR = [&CountReg](MachineInstr *MI) {
960+
return MI->getOpcode() == ARM::tMOVr &&
961+
MI->getOperand(0).getReg() == ARM::LR &&
962+
MI->getOperand(1).getReg() == CountReg &&
963+
MI->getOperand(2).getImm() == ARMCC::AL;
964+
};
965+
966+
MachineBasicBlock *MBB = Start->getParent();
967+
968+
// Find an insertion point:
969+
// - Is there a (mov lr, Count) before Start? If so, and nothing else
970+
// writes to Count before Start, we can insert at that mov.
971+
if (auto *LRDef = RDA.getUniqueReachingMIDef(Start, ARM::LR))
972+
if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg))
973+
return LRDef;
974+
975+
// - Is there a (mov lr, Count) after Start? If so, and nothing else writes
976+
// to Count after Start, we can insert at that mov.
977+
if (auto *LRDef = RDA.getLocalLiveOutMIDef(MBB, ARM::LR))
978+
if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg))
979+
return LRDef;
980+
981+
// We've found no suitable LR def and Start doesn't use LR directly. Can we
982+
// just define LR anyway?
983+
return RDA.isSafeToDefRegAt(Start, ARM::LR) ? Start : nullptr;
984+
};
994985

995-
assert(ML.getBlocks().size() == 1 &&
996-
"Shouldn't be processing a loop with more than one block");
986+
InsertPt = FindStartInsertionPoint();
987+
Revert = !ValidateRanges() || !InsertPt;
997988
CannotTailPredicate = !ValidateTailPredicate(InsertPt);
998-
LLVM_DEBUG(if (CannotTailPredicate)
999-
dbgs() << "ARM Loops: Couldn't validate tail predicate.\n");
989+
990+
LLVM_DEBUG(if (!InsertPt)
991+
dbgs() << "ARM Loops: Unable to find safe insertion point.\n";
992+
else
993+
dbgs() << "ARM Loops: Start insertion point: " << *InsertPt;
994+
if (CannotTailPredicate)
995+
dbgs() << "ARM Loops: Couldn't validate tail predicate.\n"
996+
);
1000997
}
1001998

1002999
bool LowOverheadLoop::AddVCTP(MachineInstr *MI) {
@@ -1206,7 +1203,7 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
12061203
LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n");
12071204
LoLoop.Revert = true;
12081205
}
1209-
LoLoop.CheckLegality(BBUtils.get());
1206+
LoLoop.Validate(BBUtils.get());
12101207
Expand(LoLoop);
12111208
return true;
12121209
}

0 commit comments

Comments
 (0)