@@ -414,7 +414,7 @@ namespace {
414
414
415
415
// Check the branch targets are within range and we satisfy our
416
416
// restrictions.
417
- void CheckLegality (ARMBasicBlockUtils *BBUtils);
417
+ void Validate (ARMBasicBlockUtils *BBUtils);
418
418
419
419
bool FoundAllComponents () const {
420
420
return Start && Dec && End;
@@ -520,41 +520,20 @@ std::map<MachineInstr *,
520
520
INITIALIZE_PASS (ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
521
521
false , false )
522
522
523
- MachineInstr *LowOverheadLoop::isSafeToDefineLR() {
524
- // We can define LR because LR already contains the same value.
525
- if (Start->getOperand (0 ).getReg () == ARM::LR)
526
- return Start;
527
-
528
- unsigned CountReg = Start->getOperand (0 ).getReg ();
529
- auto IsMoveLR = [&CountReg](MachineInstr *MI) {
530
- return MI->getOpcode () == ARM::tMOVr &&
531
- MI->getOperand (0 ).getReg () == ARM::LR &&
532
- MI->getOperand (1 ).getReg () == CountReg &&
533
- MI->getOperand (2 ).getImm () == ARMCC::AL;
534
- };
535
-
536
- MachineBasicBlock *MBB = Start->getParent ();
537
-
538
- // Find an insertion point:
539
- // - Is there a (mov lr, Count) before Start? If so, and nothing else writes
540
- // to Count before Start, we can insert at that mov.
541
- if (auto *LRDef = RDA.getUniqueReachingMIDef (Start, ARM::LR))
542
- if (IsMoveLR (LRDef) && RDA.hasSameReachingDef (Start, LRDef, CountReg))
543
- return LRDef;
544
-
545
- // - Is there a (mov lr, Count) after Start? If so, and nothing else writes
546
- // to Count after Start, we can insert at that mov.
547
- if (auto *LRDef = RDA.getLocalLiveOutMIDef (MBB, ARM::LR))
548
- if (IsMoveLR (LRDef) && RDA.hasSameReachingDef (Start, LRDef, CountReg))
549
- return LRDef;
550
-
551
- // We've found no suitable LR def and Start doesn't use LR directly. Can we
552
- // just define LR anyway?
553
- return RDA.isSafeToDefRegAt (Start, ARM::LR) ? Start : nullptr ;
554
- }
555
-
556
523
bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
524
+ if (!StartInsertPt)
525
+ return false ;
526
+
527
+ if (!IsTailPredicationLegal ()) {
528
+ LLVM_DEBUG (if (VCTPs.empty ())
529
+ dbgs () << " ARM Loops: Didn't find a VCTP instruction.\n " ;
530
+ dbgs () << " ARM Loops: Tail-predication is not valid.\n " );
531
+ return false ;
532
+ }
533
+
557
534
assert (!VCTPs.empty () && " VCTP instruction expected but is not set" );
535
+ assert (ML.getBlocks ().size () == 1 &&
536
+ " Shouldn't be processing a loop with more than one block" );
558
537
559
538
if (DisableTailPredication) {
560
539
LLVM_DEBUG (dbgs () << " ARM Loops: tail-predication is disabled\n " );
@@ -631,15 +610,15 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
631
610
// width, the Loop Start instruction will immediately generate one or more
632
611
// false lane mask which can, incorrectly, affect the proceeding MVE
633
612
// instructions in the preheader.
634
- auto cannotInsertWDLSTPBetween = [](MachineBasicBlock::iterator I,
613
+ auto CannotInsertWDLSTPBetween = [](MachineBasicBlock::iterator I,
635
614
MachineBasicBlock::iterator E) {
636
615
for (; I != E; ++I)
637
616
if (shouldInspect (*I))
638
617
return true ;
639
618
return false ;
640
619
};
641
620
642
- if (cannotInsertWDLSTPBetween (StartInsertPt, InsertBB->end ()))
621
+ if (CannotInsertWDLSTPBetween (StartInsertPt, InsertBB->end ()))
643
622
return false ;
644
623
645
624
// Especially in the case of while loops, InsertBB may not be the
@@ -658,15 +637,9 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
658
637
return false ;
659
638
};
660
639
661
- // First, find the block that looks like the preheader .
640
+ // Search backwards for a def, until we get to InsertBB .
662
641
MachineBasicBlock *MBB = Preheader;
663
- if (!MBB) {
664
- LLVM_DEBUG (dbgs () << " ARM Loops: Didn't find preheader.\n " );
665
- return false ;
666
- }
667
-
668
- // Then search backwards for a def, until we get to InsertBB.
669
- while (MBB != InsertBB) {
642
+ while (MBB && MBB != InsertBB) {
670
643
if (CannotProvideElements (MBB, NumElements)) {
671
644
LLVM_DEBUG (dbgs () << " ARM Loops: Unable to provide element count.\n " );
672
645
return false ;
@@ -944,59 +917,83 @@ bool LowOverheadLoop::ValidateLiveOuts() {
944
917
return true ;
945
918
}
946
919
947
- void LowOverheadLoop::CheckLegality (ARMBasicBlockUtils *BBUtils) {
920
+ void LowOverheadLoop::Validate (ARMBasicBlockUtils *BBUtils) {
948
921
if (Revert)
949
922
return ;
950
923
951
- if (!End->getOperand (1 ).isMBB ())
952
- report_fatal_error (" Expected LoopEnd to target basic block" );
924
+ auto ValidateRanges = [this , &BBUtils]() {
925
+ if (!End->getOperand (1 ).isMBB ())
926
+ report_fatal_error (" Expected LoopEnd to target basic block" );
953
927
954
- // TODO Maybe there's cases where the target doesn't have to be the header,
955
- // but for now be safe and revert.
956
- if (End->getOperand (1 ).getMBB () != ML.getHeader ()) {
957
- LLVM_DEBUG (dbgs () << " ARM Loops: LoopEnd is not targetting header.\n " );
958
- Revert = true ;
959
- return ;
960
- }
961
-
962
- // The WLS and LE instructions have 12-bits for the label offset. WLS
963
- // requires a positive offset, while LE uses negative.
964
- if (BBUtils->getOffsetOf (End) < BBUtils->getOffsetOf (ML.getHeader ()) ||
965
- !BBUtils->isBBInRange (End, ML.getHeader (), 4094 )) {
966
- LLVM_DEBUG (dbgs () << " ARM Loops: LE offset is out-of-range\n " );
967
- Revert = true ;
968
- return ;
969
- }
928
+ // TODO Maybe there's cases where the target doesn't have to be the header,
929
+ // but for now be safe and revert.
930
+ if (End->getOperand (1 ).getMBB () != ML.getHeader ()) {
931
+ LLVM_DEBUG (dbgs () << " ARM Loops: LoopEnd is not targetting header.\n " );
932
+ return false ;
933
+ }
970
934
971
- if (Start->getOpcode () == ARM::t2WhileLoopStart &&
972
- (BBUtils->getOffsetOf (Start) >
973
- BBUtils->getOffsetOf (Start->getOperand (1 ).getMBB ()) ||
974
- !BBUtils->isBBInRange (Start, Start->getOperand (1 ).getMBB (), 4094 ))) {
975
- LLVM_DEBUG (dbgs () << " ARM Loops: WLS offset is out-of-range!\n " );
976
- Revert = true ;
977
- return ;
978
- }
935
+ // The WLS and LE instructions have 12-bits for the label offset. WLS
936
+ // requires a positive offset, while LE uses negative.
937
+ if (BBUtils->getOffsetOf (End) < BBUtils->getOffsetOf (ML.getHeader ()) ||
938
+ !BBUtils->isBBInRange (End, ML.getHeader (), 4094 )) {
939
+ LLVM_DEBUG (dbgs () << " ARM Loops: LE offset is out-of-range\n " );
940
+ return false ;
941
+ }
979
942
980
- InsertPt = Revert ? nullptr : isSafeToDefineLR ();
981
- if (!InsertPt) {
982
- LLVM_DEBUG (dbgs () << " ARM Loops: Unable to find safe insertion point.\n " );
983
- Revert = true ;
984
- return ;
985
- } else
986
- LLVM_DEBUG (dbgs () << " ARM Loops: Start insertion point: " << *InsertPt);
943
+ if (Start->getOpcode () == ARM::t2WhileLoopStart &&
944
+ (BBUtils->getOffsetOf (Start) >
945
+ BBUtils->getOffsetOf (Start->getOperand (1 ).getMBB ()) ||
946
+ !BBUtils->isBBInRange (Start, Start->getOperand (1 ).getMBB (), 4094 ))) {
947
+ LLVM_DEBUG (dbgs () << " ARM Loops: WLS offset is out-of-range!\n " );
948
+ return false ;
949
+ }
950
+ return true ;
951
+ };
987
952
988
- if (!IsTailPredicationLegal ()) {
989
- LLVM_DEBUG (if (VCTPs.empty ())
990
- dbgs () << " ARM Loops: Didn't find a VCTP instruction.\n " ;
991
- dbgs () << " ARM Loops: Tail-predication is not valid.\n " );
992
- return ;
993
- }
953
+ auto FindStartInsertionPoint = [this ]() -> MachineInstr* {
954
+ // We can define LR because LR already contains the same value.
955
+ if (Start->getOperand (0 ).getReg () == ARM::LR)
956
+ return Start;
957
+
958
+ unsigned CountReg = Start->getOperand (0 ).getReg ();
959
+ auto IsMoveLR = [&CountReg](MachineInstr *MI) {
960
+ return MI->getOpcode () == ARM::tMOVr &&
961
+ MI->getOperand (0 ).getReg () == ARM::LR &&
962
+ MI->getOperand (1 ).getReg () == CountReg &&
963
+ MI->getOperand (2 ).getImm () == ARMCC::AL;
964
+ };
965
+
966
+ MachineBasicBlock *MBB = Start->getParent ();
967
+
968
+ // Find an insertion point:
969
+ // - Is there a (mov lr, Count) before Start? If so, and nothing else
970
+ // writes to Count before Start, we can insert at that mov.
971
+ if (auto *LRDef = RDA.getUniqueReachingMIDef (Start, ARM::LR))
972
+ if (IsMoveLR (LRDef) && RDA.hasSameReachingDef (Start, LRDef, CountReg))
973
+ return LRDef;
974
+
975
+ // - Is there a (mov lr, Count) after Start? If so, and nothing else writes
976
+ // to Count after Start, we can insert at that mov.
977
+ if (auto *LRDef = RDA.getLocalLiveOutMIDef (MBB, ARM::LR))
978
+ if (IsMoveLR (LRDef) && RDA.hasSameReachingDef (Start, LRDef, CountReg))
979
+ return LRDef;
980
+
981
+ // We've found no suitable LR def and Start doesn't use LR directly. Can we
982
+ // just define LR anyway?
983
+ return RDA.isSafeToDefRegAt (Start, ARM::LR) ? Start : nullptr ;
984
+ };
994
985
995
- assert (ML. getBlocks (). size () == 1 &&
996
- " Shouldn't be processing a loop with more than one block " ) ;
986
+ InsertPt = FindStartInsertionPoint ();
987
+ Revert = ! ValidateRanges () || !InsertPt ;
997
988
CannotTailPredicate = !ValidateTailPredicate (InsertPt);
998
- LLVM_DEBUG (if (CannotTailPredicate)
999
- dbgs () << " ARM Loops: Couldn't validate tail predicate.\n " );
989
+
990
+ LLVM_DEBUG (if (!InsertPt)
991
+ dbgs () << " ARM Loops: Unable to find safe insertion point.\n " ;
992
+ else
993
+ dbgs () << " ARM Loops: Start insertion point: " << *InsertPt;
994
+ if (CannotTailPredicate)
995
+ dbgs () << " ARM Loops: Couldn't validate tail predicate.\n "
996
+ );
1000
997
}
1001
998
1002
999
bool LowOverheadLoop::AddVCTP (MachineInstr *MI) {
@@ -1206,7 +1203,7 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
1206
1203
LLVM_DEBUG (dbgs () << " ARM Loops: Unable to remove LoopDec.\n " );
1207
1204
LoLoop.Revert = true ;
1208
1205
}
1209
- LoLoop.CheckLegality (BBUtils.get ());
1206
+ LoLoop.Validate (BBUtils.get ());
1210
1207
Expand (LoLoop);
1211
1208
return true ;
1212
1209
}
0 commit comments