Skip to content

Commit 82e535b

Browse files
authored
Merge pull request #7720 from fhahn/revert-shrinkwrap
Revert Shrinkwrap Changes
2 parents 0f671e7 + d51f1d5 commit 82e535b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+1360
-1401
lines changed

llvm/lib/CodeGen/ShrinkWrap.cpp

Lines changed: 38 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
#include "llvm/ADT/SmallVector.h"
5454
#include "llvm/ADT/Statistic.h"
5555
#include "llvm/Analysis/CFG.h"
56-
#include "llvm/Analysis/ValueTracking.h"
5756
#include "llvm/CodeGen/MachineBasicBlock.h"
5857
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
5958
#include "llvm/CodeGen/MachineDominators.h"
@@ -161,16 +160,10 @@ class ShrinkWrap : public MachineFunctionPass {
161160
/// Current MachineFunction.
162161
MachineFunction *MachineFunc = nullptr;
163162

164-
/// Is `true` for block numbers where we can guarantee no stack access
165-
/// or computation of stack-relative addresses on any CFG path including
166-
/// the block itself.
167-
BitVector StackAddressUsedBlockInfo;
168-
169163
/// Check if \p MI uses or defines a callee-saved register or
170164
/// a frame index. If this is the case, this means \p MI must happen
171165
/// after Save and before Restore.
172-
bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
173-
bool StackAddressUsed) const;
166+
bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const;
174167

175168
const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const {
176169
if (CurrentCSRs.empty()) {
@@ -196,9 +189,7 @@ class ShrinkWrap : public MachineFunctionPass {
196189

197190
// Try to find safe point based on dominance and block frequency without
198191
// any change in IR.
199-
bool performShrinkWrapping(
200-
const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
201-
RegScavenger *RS);
192+
bool performShrinkWrapping(MachineFunction &MF, RegScavenger *RS);
202193

203194
/// This function tries to split the restore point if doing so can shrink the
204195
/// save point further. \return True if restore point is split.
@@ -293,32 +284,9 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
293284
INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
294285
INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
295286

296-
bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
297-
bool StackAddressUsed) const {
298-
/// Check if \p Op is known to access an address not on the function's stack .
299-
/// At the moment, accesses where the underlying object is a global, function
300-
/// argument, or jump table are considered non-stack accesses. Note that the
301-
/// caller's stack may get accessed when passing an argument via the stack,
302-
/// but not the stack of the current function.
303-
///
304-
auto IsKnownNonStackPtr = [](MachineMemOperand *Op) {
305-
if (Op->getValue()) {
306-
const Value *UO = getUnderlyingObject(Op->getValue());
307-
if (!UO)
308-
return false;
309-
if (auto *Arg = dyn_cast<Argument>(UO))
310-
return !Arg->hasPassPointeeByValueCopyAttr();
311-
return isa<GlobalValue>(UO);
312-
}
313-
if (const PseudoSourceValue *PSV = Op->getPseudoValue())
314-
return PSV->isJumpTable();
315-
return false;
316-
};
317-
// Load/store operations may access the stack indirectly when we previously
318-
// computed an address to a stack location.
319-
if (StackAddressUsed && MI.mayLoadOrStore() &&
320-
(MI.isCall() || MI.hasUnmodeledSideEffects() || MI.memoperands_empty() ||
321-
!all_of(MI.memoperands(), IsKnownNonStackPtr)))
287+
bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS
288+
) const {
289+
if (MI.mayLoadOrStore())
322290
return true;
323291

324292
if (MI.getOpcode() == FrameSetupOpcode ||
@@ -558,7 +526,7 @@ bool ShrinkWrap::checkIfRestoreSplittable(
558526
SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
559527
const TargetInstrInfo *TII, RegScavenger *RS) {
560528
for (const MachineInstr &MI : *CurRestore)
561-
if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true))
529+
if (useOrDefCSROrFI(MI, RS))
562530
return false;
563531

564532
for (MachineBasicBlock *PredBB : CurRestore->predecessors()) {
@@ -618,7 +586,7 @@ bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
618586
continue;
619587
}
620588
for (const MachineInstr &MI : MBB)
621-
if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true)) {
589+
if (useOrDefCSROrFI(MI, RS)) {
622590
DirtyBBs.insert(&MBB);
623591
break;
624592
}
@@ -705,7 +673,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
705673
// terminator.
706674
if (Restore == &MBB) {
707675
for (const MachineInstr &Terminator : MBB.terminators()) {
708-
if (!useOrDefCSROrFI(Terminator, RS, /*StackAddressUsed=*/true))
676+
if (!useOrDefCSROrFI(Terminator, RS))
709677
continue;
710678
// One of the terminator needs to happen before the restore point.
711679
if (MBB.succ_empty()) {
@@ -812,62 +780,46 @@ static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE,
812780
return false;
813781
}
814782

815-
bool ShrinkWrap::performShrinkWrapping(
816-
const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
817-
RegScavenger *RS) {
818-
for (MachineBasicBlock *MBB : RPOT) {
819-
LLVM_DEBUG(dbgs() << "Look into: " << printMBBReference(*MBB) << '\n');
783+
bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) {
784+
for (MachineBasicBlock &MBB : MF) {
785+
LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' '
786+
<< MBB.getName() << '\n');
820787

821-
if (MBB->isEHFuncletEntry())
788+
if (MBB.isEHFuncletEntry())
822789
return giveUpWithRemarks(ORE, "UnsupportedEHFunclets",
823790
"EH Funclets are not supported yet.",
824-
MBB->front().getDebugLoc(), MBB);
791+
MBB.front().getDebugLoc(), &MBB);
825792

826-
if (MBB->isEHPad() || MBB->isInlineAsmBrIndirectTarget()) {
793+
if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget()) {
827794
// Push the prologue and epilogue outside of the region that may throw (or
828795
// jump out via inlineasm_br), by making sure that all the landing pads
829796
// are at least at the boundary of the save and restore points. The
830797
// problem is that a basic block can jump out from the middle in these
831798
// cases, which we do not handle.
832-
updateSaveRestorePoints(*MBB, RS);
799+
updateSaveRestorePoints(MBB, RS);
833800
if (!ArePointsInteresting()) {
834801
LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n");
835802
return false;
836803
}
837804
continue;
838805
}
839806

840-
bool StackAddressUsed = false;
841-
// Check if we found any stack accesses in the predecessors. We are not
842-
// doing a full dataflow analysis here to keep things simple but just
843-
// rely on a reverse portorder traversal (RPOT) to guarantee predecessors
844-
// are already processed except for loops (and accept the conservative
845-
// result for loops).
846-
for (const MachineBasicBlock *Pred : MBB->predecessors()) {
847-
if (StackAddressUsedBlockInfo.test(Pred->getNumber())) {
848-
StackAddressUsed = true;
849-
break;
850-
}
851-
}
852-
853-
for (const MachineInstr &MI : *MBB) {
854-
if (useOrDefCSROrFI(MI, RS, StackAddressUsed)) {
855-
// Save (resp. restore) point must dominate (resp. post dominate)
856-
// MI. Look for the proper basic block for those.
857-
updateSaveRestorePoints(*MBB, RS);
858-
// If we are at a point where we cannot improve the placement of
859-
// save/restore instructions, just give up.
860-
if (!ArePointsInteresting()) {
861-
LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
862-
return false;
863-
}
864-
// No need to look for other instructions, this basic block
865-
// will already be part of the handled region.
866-
StackAddressUsed = true;
867-
break;
807+
for (const MachineInstr &MI : MBB) {
808+
if (!useOrDefCSROrFI(MI, RS))
809+
continue;
810+
// Save (resp. restore) point must dominate (resp. post dominate)
811+
// MI. Look for the proper basic block for those.
812+
updateSaveRestorePoints(MBB, RS);
813+
// If we are at a point where we cannot improve the placement of
814+
// save/restore instructions, just give up.
815+
if (!ArePointsInteresting()) {
816+
LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
817+
return false;
868818
}
819+
// No need to look for other instructions, this basic block
820+
// will already be part of the handled region.
821+
break;
869822
}
870-
StackAddressUsedBlockInfo[MBB->getNumber()] = StackAddressUsed;
871823
}
872824
if (!ArePointsInteresting()) {
873825
// If the points are not interesting at this point, then they must be null
@@ -881,13 +833,13 @@ bool ShrinkWrap::performShrinkWrapping(
881833
LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
882834
<< '\n');
883835

884-
const TargetFrameLowering *TFI =
885-
MachineFunc->getSubtarget().getFrameLowering();
836+
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
886837
do {
887838
LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
888-
<< printMBBReference(*Save) << ' '
839+
<< Save->getNumber() << ' ' << Save->getName() << ' '
889840
<< MBFI->getBlockFreq(Save).getFrequency()
890-
<< "\nRestore: " << printMBBReference(*Restore) << ' '
841+
<< "\nRestore: " << Restore->getNumber() << ' '
842+
<< Restore->getName() << ' '
891843
<< MBFI->getBlockFreq(Restore).getFrequency() << '\n');
892844

893845
bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;
@@ -948,18 +900,17 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
948900

949901
bool Changed = false;
950902

951-
StackAddressUsedBlockInfo.resize(MF.getNumBlockIDs(), true);
952-
bool HasCandidate = performShrinkWrapping(RPOT, RS.get());
953-
StackAddressUsedBlockInfo.clear();
903+
bool HasCandidate = performShrinkWrapping(MF, RS.get());
954904
Changed = postShrinkWrapping(HasCandidate, MF, RS.get());
955905
if (!HasCandidate && !Changed)
956906
return false;
957907
if (!ArePointsInteresting())
958908
return Changed;
959909

960910
LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
961-
<< printMBBReference(*Save) << ' '
962-
<< "\nRestore: " << printMBBReference(*Restore) << '\n');
911+
<< Save->getNumber() << ' ' << Save->getName()
912+
<< "\nRestore: " << Restore->getNumber() << ' '
913+
<< Restore->getName() << '\n');
963914

964915
MachineFrameInfo &MFI = MF.getFrameInfo();
965916
MFI.setSavePoint(Save);

llvm/test/CodeGen/AArch64/addsub.ll

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ define i32 @sub_two_parts_imm_i32_neg(i32 %a) {
232232
define i32 @add_27962026(i32 %a) {
233233
; CHECK-LABEL: add_27962026:
234234
; CHECK: // %bb.0:
235-
; CHECK-NEXT: mov w8, #43690
235+
; CHECK-NEXT: mov w8, #43690 // =0xaaaa
236236
; CHECK-NEXT: movk w8, #426, lsl #16
237237
; CHECK-NEXT: add w0, w0, w8
238238
; CHECK-NEXT: ret
@@ -243,7 +243,7 @@ define i32 @add_27962026(i32 %a) {
243243
define i32 @add_65534(i32 %a) {
244244
; CHECK-LABEL: add_65534:
245245
; CHECK: // %bb.0:
246-
; CHECK-NEXT: mov w8, #65534
246+
; CHECK-NEXT: mov w8, #65534 // =0xfffe
247247
; CHECK-NEXT: add w0, w0, w8
248248
; CHECK-NEXT: ret
249249
%b = add i32 %a, 65534
@@ -259,7 +259,7 @@ define void @add_in_loop(i32 %0) {
259259
; CHECK-NEXT: .cfi_def_cfa_offset 16
260260
; CHECK-NEXT: .cfi_offset w19, -8
261261
; CHECK-NEXT: .cfi_offset w30, -16
262-
; CHECK-NEXT: mov w19, #43690
262+
; CHECK-NEXT: mov w19, #43690 // =0xaaaa
263263
; CHECK-NEXT: movk w19, #170, lsl #16
264264
; CHECK-NEXT: .LBB15_1: // =>This Inner Loop Header: Depth=1
265265
; CHECK-NEXT: add w0, w0, w19
@@ -373,7 +373,7 @@ declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
373373
define i1 @uadd_add(i8 %a, i8 %b, ptr %p) {
374374
; CHECK-LABEL: uadd_add:
375375
; CHECK: // %bb.0:
376-
; CHECK-NEXT: mov w8, #255
376+
; CHECK-NEXT: mov w8, #255 // =0xff
377377
; CHECK-NEXT: bic w8, w8, w0
378378
; CHECK-NEXT: add w8, w8, w1, uxtb
379379
; CHECK-NEXT: lsr w0, w8, #8
@@ -398,7 +398,7 @@ define i1 @uadd_add(i8 %a, i8 %b, ptr %p) {
398398
define i64 @addl_0x80000000(i64 %a) {
399399
; CHECK-LABEL: addl_0x80000000:
400400
; CHECK: // %bb.0:
401-
; CHECK-NEXT: mov w8, #48576
401+
; CHECK-NEXT: mov w8, #48576 // =0xbdc0
402402
; CHECK-NEXT: movk w8, #65520, lsl #16
403403
; CHECK-NEXT: add x0, x0, x8
404404
; CHECK-NEXT: ret
@@ -499,7 +499,7 @@ define i1 @ne_ln(i64 %0) {
499499
define i1 @reject_eq(i32 %0) {
500500
; CHECK-LABEL: reject_eq:
501501
; CHECK: // %bb.0:
502-
; CHECK-NEXT: mov w8, #51712
502+
; CHECK-NEXT: mov w8, #51712 // =0xca00
503503
; CHECK-NEXT: movk w8, #15258, lsl #16
504504
; CHECK-NEXT: cmp w0, w8
505505
; CHECK-NEXT: cset w0, eq
@@ -511,7 +511,7 @@ define i1 @reject_eq(i32 %0) {
511511
define i1 @reject_non_eqne_csinc(i32 %0) {
512512
; CHECK-LABEL: reject_non_eqne_csinc:
513513
; CHECK: // %bb.0:
514-
; CHECK-NEXT: mov w8, #4369
514+
; CHECK-NEXT: mov w8, #4369 // =0x1111
515515
; CHECK-NEXT: movk w8, #17, lsl #16
516516
; CHECK-NEXT: cmp w0, w8
517517
; CHECK-NEXT: cset w0, lo
@@ -524,9 +524,9 @@ define i32 @accept_csel(i32 %0) {
524524
; CHECK-LABEL: accept_csel:
525525
; CHECK: // %bb.0:
526526
; CHECK-NEXT: sub w9, w0, #273, lsl #12 // =1118208
527-
; CHECK-NEXT: mov w8, #17
527+
; CHECK-NEXT: mov w8, #17 // =0x11
528528
; CHECK-NEXT: cmp w9, #273
529-
; CHECK-NEXT: mov w9, #11
529+
; CHECK-NEXT: mov w9, #11 // =0xb
530530
; CHECK-NEXT: csel w0, w9, w8, eq
531531
; CHECK-NEXT: ret
532532
%2 = icmp eq i32 %0, 1118481
@@ -537,11 +537,11 @@ define i32 @accept_csel(i32 %0) {
537537
define i32 @reject_non_eqne_csel(i32 %0) {
538538
; CHECK-LABEL: reject_non_eqne_csel:
539539
; CHECK: // %bb.0:
540-
; CHECK-NEXT: mov w8, #4369
541-
; CHECK-NEXT: mov w9, #11
540+
; CHECK-NEXT: mov w8, #4369 // =0x1111
541+
; CHECK-NEXT: mov w9, #11 // =0xb
542542
; CHECK-NEXT: movk w8, #17, lsl #16
543543
; CHECK-NEXT: cmp w0, w8
544-
; CHECK-NEXT: mov w8, #17
544+
; CHECK-NEXT: mov w8, #17 // =0x11
545545
; CHECK-NEXT: csel w0, w9, w8, lo
546546
; CHECK-NEXT: ret
547547
%2 = icmp ult i32 %0, 1118481
@@ -573,7 +573,7 @@ define void @accept_branch(i32 %0) {
573573
define void @reject_non_eqne_branch(i32 %0) {
574574
; CHECK-LABEL: reject_non_eqne_branch:
575575
; CHECK: // %bb.0:
576-
; CHECK-NEXT: mov w8, #13398
576+
; CHECK-NEXT: mov w8, #13398 // =0x3456
577577
; CHECK-NEXT: movk w8, #18, lsl #16
578578
; CHECK-NEXT: cmp w0, w8
579579
; CHECK-NEXT: b.le .LBB33_2
@@ -593,20 +593,20 @@ define void @reject_non_eqne_branch(i32 %0) {
593593
define i32 @reject_multiple_usages(i32 %0) {
594594
; CHECK-LABEL: reject_multiple_usages:
595595
; CHECK: // %bb.0:
596-
; CHECK-NEXT: mov w8, #4369
597-
; CHECK-NEXT: mov w9, #3
596+
; CHECK-NEXT: mov w8, #4369 // =0x1111
597+
; CHECK-NEXT: mov w9, #3 // =0x3
598598
; CHECK-NEXT: movk w8, #17, lsl #16
599-
; CHECK-NEXT: mov w10, #17
599+
; CHECK-NEXT: mov w10, #17 // =0x11
600600
; CHECK-NEXT: cmp w0, w8
601-
; CHECK-NEXT: mov w8, #9
602-
; CHECK-NEXT: mov w11, #12
601+
; CHECK-NEXT: mov w8, #9 // =0x9
602+
; CHECK-NEXT: mov w11, #12 // =0xc
603603
; CHECK-NEXT: csel w8, w8, w9, eq
604604
; CHECK-NEXT: csel w9, w11, w10, hi
605605
; CHECK-NEXT: add w8, w8, w9
606-
; CHECK-NEXT: mov w9, #53312
606+
; CHECK-NEXT: mov w9, #53312 // =0xd040
607607
; CHECK-NEXT: movk w9, #2, lsl #16
608608
; CHECK-NEXT: cmp w0, w9
609-
; CHECK-NEXT: mov w9, #26304
609+
; CHECK-NEXT: mov w9, #26304 // =0x66c0
610610
; CHECK-NEXT: movk w9, #1433, lsl #16
611611
; CHECK-NEXT: csel w0, w8, w9, hi
612612
; CHECK-NEXT: ret
@@ -651,6 +651,9 @@ declare dso_local i32 @crng_reseed(...) local_unnamed_addr
651651
define dso_local i32 @_extract_crng_crng() {
652652
; CHECK-LABEL: _extract_crng_crng:
653653
; CHECK: // %bb.0: // %entry
654+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
655+
; CHECK-NEXT: .cfi_def_cfa_offset 16
656+
; CHECK-NEXT: .cfi_offset w30, -16
654657
; CHECK-NEXT: adrp x8, _extract_crng_crng
655658
; CHECK-NEXT: add x8, x8, :lo12:_extract_crng_crng
656659
; CHECK-NEXT: tbnz x8, #63, .LBB36_2
@@ -662,18 +665,15 @@ define dso_local i32 @_extract_crng_crng() {
662665
; CHECK-NEXT: cmn x8, #1272
663666
; CHECK-NEXT: b.pl .LBB36_3
664667
; CHECK-NEXT: .LBB36_2: // %if.then
665-
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
666-
; CHECK-NEXT: .cfi_def_cfa_offset 16
667-
; CHECK-NEXT: .cfi_offset w30, -16
668668
; CHECK-NEXT: adrp x8, primary_crng
669669
; CHECK-NEXT: adrp x9, input_pool
670670
; CHECK-NEXT: add x9, x9, :lo12:input_pool
671671
; CHECK-NEXT: ldr w8, [x8, :lo12:primary_crng]
672672
; CHECK-NEXT: cmp w8, #0
673673
; CHECK-NEXT: csel x0, xzr, x9, eq
674674
; CHECK-NEXT: bl crng_reseed
675-
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
676675
; CHECK-NEXT: .LBB36_3: // %if.end
676+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
677677
; CHECK-NEXT: ret
678678
entry:
679679
br i1 icmp slt (ptr @_extract_crng_crng, ptr null), label %if.then, label %lor.lhs.false
@@ -778,7 +778,7 @@ define i32 @commute_subop0_zext(i16 %x, i32 %y, i32 %z) {
778778
define i8 @commute_subop0_anyext(i16 %a, i16 %b, i32 %c) {
779779
; CHECK-LABEL: commute_subop0_anyext:
780780
; CHECK: // %bb.0:
781-
; CHECK-NEXT: mov w8, #111
781+
; CHECK-NEXT: mov w8, #111 // =0x6f
782782
; CHECK-NEXT: sub w9, w2, w1
783783
; CHECK-NEXT: madd w8, w0, w8, w9
784784
; CHECK-NEXT: lsl w8, w8, #3

0 commit comments

Comments
 (0)