Skip to content

Revert shrinkwrap changes #7720

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 38 additions & 87 deletions llvm/lib/CodeGen/ShrinkWrap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
Expand Down Expand Up @@ -161,16 +160,10 @@ class ShrinkWrap : public MachineFunctionPass {
/// Current MachineFunction.
MachineFunction *MachineFunc = nullptr;

/// Is `true` for block numbers where we can guarantee no stack access
/// or computation of stack-relative addresses on any CFG path including
/// the block itself.
BitVector StackAddressUsedBlockInfo;

/// Check if \p MI uses or defines a callee-saved register or
/// a frame index. If this is the case, this means \p MI must happen
/// after Save and before Restore.
bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
bool StackAddressUsed) const;
bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const;

const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const {
if (CurrentCSRs.empty()) {
Expand All @@ -196,9 +189,7 @@ class ShrinkWrap : public MachineFunctionPass {

// Try to find safe point based on dominance and block frequency without
// any change in IR.
bool performShrinkWrapping(
const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
RegScavenger *RS);
bool performShrinkWrapping(MachineFunction &MF, RegScavenger *RS);

/// This function tries to split the restore point if doing so can shrink the
/// save point further. \return True if restore point is split.
Expand Down Expand Up @@ -293,32 +284,9 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)

bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
bool StackAddressUsed) const {
/// Check if \p Op is known to access an address not on the function's stack .
/// At the moment, accesses where the underlying object is a global, function
/// argument, or jump table are considered non-stack accesses. Note that the
/// caller's stack may get accessed when passing an argument via the stack,
/// but not the stack of the current function.
///
auto IsKnownNonStackPtr = [](MachineMemOperand *Op) {
if (Op->getValue()) {
const Value *UO = getUnderlyingObject(Op->getValue());
if (!UO)
return false;
if (auto *Arg = dyn_cast<Argument>(UO))
return !Arg->hasPassPointeeByValueCopyAttr();
return isa<GlobalValue>(UO);
}
if (const PseudoSourceValue *PSV = Op->getPseudoValue())
return PSV->isJumpTable();
return false;
};
// Load/store operations may access the stack indirectly when we previously
// computed an address to a stack location.
if (StackAddressUsed && MI.mayLoadOrStore() &&
(MI.isCall() || MI.hasUnmodeledSideEffects() || MI.memoperands_empty() ||
!all_of(MI.memoperands(), IsKnownNonStackPtr)))
bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS
) const {
if (MI.mayLoadOrStore())
return true;

if (MI.getOpcode() == FrameSetupOpcode ||
Expand Down Expand Up @@ -558,7 +526,7 @@ bool ShrinkWrap::checkIfRestoreSplittable(
SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
const TargetInstrInfo *TII, RegScavenger *RS) {
for (const MachineInstr &MI : *CurRestore)
if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true))
if (useOrDefCSROrFI(MI, RS))
return false;

for (MachineBasicBlock *PredBB : CurRestore->predecessors()) {
Expand Down Expand Up @@ -618,7 +586,7 @@ bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
continue;
}
for (const MachineInstr &MI : MBB)
if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true)) {
if (useOrDefCSROrFI(MI, RS)) {
DirtyBBs.insert(&MBB);
break;
}
Expand Down Expand Up @@ -705,7 +673,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
// terminator.
if (Restore == &MBB) {
for (const MachineInstr &Terminator : MBB.terminators()) {
if (!useOrDefCSROrFI(Terminator, RS, /*StackAddressUsed=*/true))
if (!useOrDefCSROrFI(Terminator, RS))
continue;
// One of the terminator needs to happen before the restore point.
if (MBB.succ_empty()) {
Expand Down Expand Up @@ -812,62 +780,46 @@ static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE,
return false;
}

bool ShrinkWrap::performShrinkWrapping(
const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
RegScavenger *RS) {
for (MachineBasicBlock *MBB : RPOT) {
LLVM_DEBUG(dbgs() << "Look into: " << printMBBReference(*MBB) << '\n');
bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) {
for (MachineBasicBlock &MBB : MF) {
LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' '
<< MBB.getName() << '\n');

if (MBB->isEHFuncletEntry())
if (MBB.isEHFuncletEntry())
return giveUpWithRemarks(ORE, "UnsupportedEHFunclets",
"EH Funclets are not supported yet.",
MBB->front().getDebugLoc(), MBB);
MBB.front().getDebugLoc(), &MBB);

if (MBB->isEHPad() || MBB->isInlineAsmBrIndirectTarget()) {
if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget()) {
// Push the prologue and epilogue outside of the region that may throw (or
// jump out via inlineasm_br), by making sure that all the landing pads
// are at least at the boundary of the save and restore points. The
// problem is that a basic block can jump out from the middle in these
// cases, which we do not handle.
updateSaveRestorePoints(*MBB, RS);
updateSaveRestorePoints(MBB, RS);
if (!ArePointsInteresting()) {
LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n");
return false;
}
continue;
}

bool StackAddressUsed = false;
// Check if we found any stack accesses in the predecessors. We are not
// doing a full dataflow analysis here to keep things simple but just
// rely on a reverse portorder traversal (RPOT) to guarantee predecessors
// are already processed except for loops (and accept the conservative
// result for loops).
for (const MachineBasicBlock *Pred : MBB->predecessors()) {
if (StackAddressUsedBlockInfo.test(Pred->getNumber())) {
StackAddressUsed = true;
break;
}
}

for (const MachineInstr &MI : *MBB) {
if (useOrDefCSROrFI(MI, RS, StackAddressUsed)) {
// Save (resp. restore) point must dominate (resp. post dominate)
// MI. Look for the proper basic block for those.
updateSaveRestorePoints(*MBB, RS);
// If we are at a point where we cannot improve the placement of
// save/restore instructions, just give up.
if (!ArePointsInteresting()) {
LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
return false;
}
// No need to look for other instructions, this basic block
// will already be part of the handled region.
StackAddressUsed = true;
break;
for (const MachineInstr &MI : MBB) {
if (!useOrDefCSROrFI(MI, RS))
continue;
// Save (resp. restore) point must dominate (resp. post dominate)
// MI. Look for the proper basic block for those.
updateSaveRestorePoints(MBB, RS);
// If we are at a point where we cannot improve the placement of
// save/restore instructions, just give up.
if (!ArePointsInteresting()) {
LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
return false;
}
// No need to look for other instructions, this basic block
// will already be part of the handled region.
break;
}
StackAddressUsedBlockInfo[MBB->getNumber()] = StackAddressUsed;
}
if (!ArePointsInteresting()) {
// If the points are not interesting at this point, then they must be null
Expand All @@ -881,13 +833,13 @@ bool ShrinkWrap::performShrinkWrapping(
LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
<< '\n');

const TargetFrameLowering *TFI =
MachineFunc->getSubtarget().getFrameLowering();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
do {
LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
<< printMBBReference(*Save) << ' '
<< Save->getNumber() << ' ' << Save->getName() << ' '
<< MBFI->getBlockFreq(Save).getFrequency()
<< "\nRestore: " << printMBBReference(*Restore) << ' '
<< "\nRestore: " << Restore->getNumber() << ' '
<< Restore->getName() << ' '
<< MBFI->getBlockFreq(Restore).getFrequency() << '\n');

bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;
Expand Down Expand Up @@ -948,18 +900,17 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {

bool Changed = false;

StackAddressUsedBlockInfo.resize(MF.getNumBlockIDs(), true);
bool HasCandidate = performShrinkWrapping(RPOT, RS.get());
StackAddressUsedBlockInfo.clear();
bool HasCandidate = performShrinkWrapping(MF, RS.get());
Changed = postShrinkWrapping(HasCandidate, MF, RS.get());
if (!HasCandidate && !Changed)
return false;
if (!ArePointsInteresting())
return Changed;

LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
<< printMBBReference(*Save) << ' '
<< "\nRestore: " << printMBBReference(*Restore) << '\n');
<< Save->getNumber() << ' ' << Save->getName()
<< "\nRestore: " << Restore->getNumber() << ' '
<< Restore->getName() << '\n');

MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setSavePoint(Save);
Expand Down
50 changes: 25 additions & 25 deletions llvm/test/CodeGen/AArch64/addsub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ define i32 @sub_two_parts_imm_i32_neg(i32 %a) {
define i32 @add_27962026(i32 %a) {
; CHECK-LABEL: add_27962026:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43690
; CHECK-NEXT: mov w8, #43690 // =0xaaaa
; CHECK-NEXT: movk w8, #426, lsl #16
; CHECK-NEXT: add w0, w0, w8
; CHECK-NEXT: ret
Expand All @@ -243,7 +243,7 @@ define i32 @add_27962026(i32 %a) {
define i32 @add_65534(i32 %a) {
; CHECK-LABEL: add_65534:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65534
; CHECK-NEXT: mov w8, #65534 // =0xfffe
; CHECK-NEXT: add w0, w0, w8
; CHECK-NEXT: ret
%b = add i32 %a, 65534
Expand All @@ -259,7 +259,7 @@ define void @add_in_loop(i32 %0) {
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: mov w19, #43690
; CHECK-NEXT: mov w19, #43690 // =0xaaaa
; CHECK-NEXT: movk w19, #170, lsl #16
; CHECK-NEXT: .LBB15_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: add w0, w0, w19
Expand Down Expand Up @@ -373,7 +373,7 @@ declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
define i1 @uadd_add(i8 %a, i8 %b, ptr %p) {
; CHECK-LABEL: uadd_add:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #255
; CHECK-NEXT: mov w8, #255 // =0xff
; CHECK-NEXT: bic w8, w8, w0
; CHECK-NEXT: add w8, w8, w1, uxtb
; CHECK-NEXT: lsr w0, w8, #8
Expand All @@ -398,7 +398,7 @@ define i1 @uadd_add(i8 %a, i8 %b, ptr %p) {
define i64 @addl_0x80000000(i64 %a) {
; CHECK-LABEL: addl_0x80000000:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48576
; CHECK-NEXT: mov w8, #48576 // =0xbdc0
; CHECK-NEXT: movk w8, #65520, lsl #16
; CHECK-NEXT: add x0, x0, x8
; CHECK-NEXT: ret
Expand Down Expand Up @@ -499,7 +499,7 @@ define i1 @ne_ln(i64 %0) {
define i1 @reject_eq(i32 %0) {
; CHECK-LABEL: reject_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #51712
; CHECK-NEXT: mov w8, #51712 // =0xca00
; CHECK-NEXT: movk w8, #15258, lsl #16
; CHECK-NEXT: cmp w0, w8
; CHECK-NEXT: cset w0, eq
Expand All @@ -511,7 +511,7 @@ define i1 @reject_eq(i32 %0) {
define i1 @reject_non_eqne_csinc(i32 %0) {
; CHECK-LABEL: reject_non_eqne_csinc:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #4369
; CHECK-NEXT: mov w8, #4369 // =0x1111
; CHECK-NEXT: movk w8, #17, lsl #16
; CHECK-NEXT: cmp w0, w8
; CHECK-NEXT: cset w0, lo
Expand All @@ -524,9 +524,9 @@ define i32 @accept_csel(i32 %0) {
; CHECK-LABEL: accept_csel:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w9, w0, #273, lsl #12 // =1118208
; CHECK-NEXT: mov w8, #17
; CHECK-NEXT: mov w8, #17 // =0x11
; CHECK-NEXT: cmp w9, #273
; CHECK-NEXT: mov w9, #11
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: csel w0, w9, w8, eq
; CHECK-NEXT: ret
%2 = icmp eq i32 %0, 1118481
Expand All @@ -537,11 +537,11 @@ define i32 @accept_csel(i32 %0) {
define i32 @reject_non_eqne_csel(i32 %0) {
; CHECK-LABEL: reject_non_eqne_csel:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #4369
; CHECK-NEXT: mov w9, #11
; CHECK-NEXT: mov w8, #4369 // =0x1111
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: movk w8, #17, lsl #16
; CHECK-NEXT: cmp w0, w8
; CHECK-NEXT: mov w8, #17
; CHECK-NEXT: mov w8, #17 // =0x11
; CHECK-NEXT: csel w0, w9, w8, lo
; CHECK-NEXT: ret
%2 = icmp ult i32 %0, 1118481
Expand Down Expand Up @@ -573,7 +573,7 @@ define void @accept_branch(i32 %0) {
define void @reject_non_eqne_branch(i32 %0) {
; CHECK-LABEL: reject_non_eqne_branch:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #13398
; CHECK-NEXT: mov w8, #13398 // =0x3456
; CHECK-NEXT: movk w8, #18, lsl #16
; CHECK-NEXT: cmp w0, w8
; CHECK-NEXT: b.le .LBB33_2
Expand All @@ -593,20 +593,20 @@ define void @reject_non_eqne_branch(i32 %0) {
define i32 @reject_multiple_usages(i32 %0) {
; CHECK-LABEL: reject_multiple_usages:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #4369
; CHECK-NEXT: mov w9, #3
; CHECK-NEXT: mov w8, #4369 // =0x1111
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: movk w8, #17, lsl #16
; CHECK-NEXT: mov w10, #17
; CHECK-NEXT: mov w10, #17 // =0x11
; CHECK-NEXT: cmp w0, w8
; CHECK-NEXT: mov w8, #9
; CHECK-NEXT: mov w11, #12
; CHECK-NEXT: mov w8, #9 // =0x9
; CHECK-NEXT: mov w11, #12 // =0xc
; CHECK-NEXT: csel w8, w8, w9, eq
; CHECK-NEXT: csel w9, w11, w10, hi
; CHECK-NEXT: add w8, w8, w9
; CHECK-NEXT: mov w9, #53312
; CHECK-NEXT: mov w9, #53312 // =0xd040
; CHECK-NEXT: movk w9, #2, lsl #16
; CHECK-NEXT: cmp w0, w9
; CHECK-NEXT: mov w9, #26304
; CHECK-NEXT: mov w9, #26304 // =0x66c0
; CHECK-NEXT: movk w9, #1433, lsl #16
; CHECK-NEXT: csel w0, w8, w9, hi
; CHECK-NEXT: ret
Expand Down Expand Up @@ -651,6 +651,9 @@ declare dso_local i32 @crng_reseed(...) local_unnamed_addr
define dso_local i32 @_extract_crng_crng() {
; CHECK-LABEL: _extract_crng_crng:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: adrp x8, _extract_crng_crng
; CHECK-NEXT: add x8, x8, :lo12:_extract_crng_crng
; CHECK-NEXT: tbnz x8, #63, .LBB36_2
Expand All @@ -662,18 +665,15 @@ define dso_local i32 @_extract_crng_crng() {
; CHECK-NEXT: cmn x8, #1272
; CHECK-NEXT: b.pl .LBB36_3
; CHECK-NEXT: .LBB36_2: // %if.then
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: adrp x8, primary_crng
; CHECK-NEXT: adrp x9, input_pool
; CHECK-NEXT: add x9, x9, :lo12:input_pool
; CHECK-NEXT: ldr w8, [x8, :lo12:primary_crng]
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: csel x0, xzr, x9, eq
; CHECK-NEXT: bl crng_reseed
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .LBB36_3: // %if.end
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
br i1 icmp slt (ptr @_extract_crng_crng, ptr null), label %if.then, label %lor.lhs.false
Expand Down Expand Up @@ -778,7 +778,7 @@ define i32 @commute_subop0_zext(i16 %x, i32 %y, i32 %z) {
define i8 @commute_subop0_anyext(i16 %a, i16 %b, i32 %c) {
; CHECK-LABEL: commute_subop0_anyext:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #111
; CHECK-NEXT: mov w8, #111 // =0x6f
; CHECK-NEXT: sub w9, w2, w1
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: lsl w8, w8, #3
Expand Down
Loading