Skip to content

Commit 4e2b4f9

Browse files
committed
[ShrinkWrap] Use underlying object to rule out stack access.
Allow shrink-wrapping past memory accesses that only access globals or function arguments. This patch uses getUnderlyingObject to try to identify the accessed object by a given memory operand. If it is a global or an argument, it does not access the stack of the current function and should not block shrink wrapping. Note that the caller's stack may get accessed when passing an argument via the stack, but not the stack of the current function. This addresses part of the TODO from D63152. Reviewed By: thegameg Differential Revision: https://reviews.llvm.org/D149668
1 parent 583d492 commit 4e2b4f9

23 files changed

+349
-333
lines changed

llvm/lib/CodeGen/ShrinkWrap.cpp

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
#include "llvm/ADT/SmallVector.h"
5454
#include "llvm/ADT/Statistic.h"
5555
#include "llvm/Analysis/CFG.h"
56+
#include "llvm/Analysis/ValueTracking.h"
5657
#include "llvm/CodeGen/MachineBasicBlock.h"
5758
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
5859
#include "llvm/CodeGen/MachineDominators.h"
@@ -259,13 +260,30 @@ INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
259260

260261
bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
261262
RegScavenger *RS) const {
263+
/// Check if \p Op is known to access an address not on the function's stack .
264+
/// At the moment, accesses where the underlying object is a global or a
265+
/// function argument are considered non-stack accesses. Note that the
266+
/// caller's stack may get accessed when passing an argument via the stack,
267+
/// but not the stack of the current function.
268+
///
269+
auto IsKnownNonStackPtr = [](MachineMemOperand *Op) {
270+
if (Op->getValue()) {
271+
const Value *UO = getUnderlyingObject(Op->getValue());
272+
if (!UO)
273+
return false;
274+
if (auto *Arg = dyn_cast<Argument>(UO))
275+
return !Arg->hasPassPointeeByValueCopyAttr();
276+
return isa<GlobalValue>(UO);
277+
}
278+
return false;
279+
};
262280
// This prevents premature stack popping when occurs a indirect stack
263-
// access. It is overly aggressive for the moment.
264-
// TODO: - Obvious non-stack loads and store, such as global values,
265-
// are known to not access the stack.
281+
// access. It is overly aggressive for the moment.
282+
// TODO:
266283
// - Further, data dependency and alias analysis can validate
267284
// that load and stores never derive from the stack pointer.
268-
if (MI.mayLoadOrStore())
285+
if (MI.mayLoadOrStore() && (MI.isCall() || MI.hasUnmodeledSideEffects() ||
286+
!all_of(MI.memoperands(), IsKnownNonStackPtr)))
269287
return true;
270288

271289
if (MI.getOpcode() == FrameSetupOpcode ||

llvm/test/CodeGen/AArch64/addsub.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -651,9 +651,6 @@ declare dso_local i32 @crng_reseed(...) local_unnamed_addr
651651
define dso_local i32 @_extract_crng_crng() {
652652
; CHECK-LABEL: _extract_crng_crng:
653653
; CHECK: // %bb.0: // %entry
654-
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
655-
; CHECK-NEXT: .cfi_def_cfa_offset 16
656-
; CHECK-NEXT: .cfi_offset w30, -16
657654
; CHECK-NEXT: adrp x8, _extract_crng_crng
658655
; CHECK-NEXT: add x8, x8, :lo12:_extract_crng_crng
659656
; CHECK-NEXT: tbnz x8, #63, .LBB36_2
@@ -665,15 +662,20 @@ define dso_local i32 @_extract_crng_crng() {
665662
; CHECK-NEXT: cmn x8, #1272
666663
; CHECK-NEXT: b.pl .LBB36_3
667664
; CHECK-NEXT: .LBB36_2: // %if.then
665+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
666+
; CHECK-NEXT: .cfi_def_cfa_offset 16
667+
; CHECK-NEXT: .cfi_offset w30, -16
668668
; CHECK-NEXT: adrp x8, primary_crng
669669
; CHECK-NEXT: adrp x9, input_pool
670670
; CHECK-NEXT: add x9, x9, :lo12:input_pool
671671
; CHECK-NEXT: ldr w8, [x8, :lo12:primary_crng]
672672
; CHECK-NEXT: cmp w8, #0
673673
; CHECK-NEXT: csel x0, xzr, x9, eq
674674
; CHECK-NEXT: bl crng_reseed
675-
; CHECK-NEXT: .LBB36_3: // %if.end
676675
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
676+
; CHECK-NEXT: .LBB36_3: // %if.end
677+
; CHECK-NEXT: .cfi_def_cfa wsp, 0
678+
; CHECK-NEXT: .cfi_same_value w30
677679
; CHECK-NEXT: ret
678680
entry:
679681
br i1 icmp slt (ptr @_extract_crng_crng, ptr null), label %if.then, label %lor.lhs.false

llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,25 +1004,28 @@ end:
10041004
define i32 @stack_realign(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2) {
10051005
; ENABLE-LABEL: stack_realign:
10061006
; ENABLE: ; %bb.0:
1007+
; ENABLE-NEXT: lsl w8, w0, w1
1008+
; ENABLE-NEXT: lsl w9, w1, w0
1009+
; ENABLE-NEXT: cmp w0, w1
1010+
; ENABLE-NEXT: b.ge LBB13_2
1011+
; ENABLE-NEXT: ; %bb.1: ; %true
10071012
; ENABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
10081013
; ENABLE-NEXT: .cfi_def_cfa_offset 16
10091014
; ENABLE-NEXT: mov x29, sp
10101015
; ENABLE-NEXT: .cfi_def_cfa w29, 16
10111016
; ENABLE-NEXT: .cfi_offset w30, -8
10121017
; ENABLE-NEXT: .cfi_offset w29, -16
1013-
; ENABLE-NEXT: sub x9, sp, #16
1014-
; ENABLE-NEXT: and sp, x9, #0xffffffffffffffe0
1015-
; ENABLE-NEXT: lsl w8, w0, w1
1016-
; ENABLE-NEXT: lsl w9, w1, w0
1017-
; ENABLE-NEXT: cmp w0, w1
1018-
; ENABLE-NEXT: b.ge LBB13_2
1019-
; ENABLE-NEXT: ; %bb.1: ; %true
1018+
; ENABLE-NEXT: sub x1, sp, #16
1019+
; ENABLE-NEXT: and sp, x1, #0xffffffffffffffe0
10201020
; ENABLE-NEXT: str w0, [sp]
1021+
; ENABLE-NEXT: mov sp, x29
1022+
; ENABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
10211023
; ENABLE-NEXT: LBB13_2: ; %false
1024+
; ENABLE-NEXT: .cfi_def_cfa wsp, 0
1025+
; ENABLE-NEXT: .cfi_same_value w30
1026+
; ENABLE-NEXT: .cfi_same_value w29
10221027
; ENABLE-NEXT: str w8, [x2]
10231028
; ENABLE-NEXT: str w9, [x3]
1024-
; ENABLE-NEXT: mov sp, x29
1025-
; ENABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
10261029
; ENABLE-NEXT: ret
10271030
;
10281031
; DISABLE-LABEL: stack_realign:

llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -658,28 +658,28 @@ return: ; preds = %if.end, %land.lhs.t
658658
define i32 @fcmpri(i32 %argc, ptr nocapture readonly %argv) #0 {
659659
; CHECK-LABEL: fcmpri:
660660
; CHECK: // %bb.0: // %entry
661-
; CHECK-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill
662-
; CHECK-NEXT: .cfi_def_cfa_offset 32
663-
; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
664-
; CHECK-NEXT: .cfi_offset w19, -8
665-
; CHECK-NEXT: .cfi_offset w30, -16
666-
; CHECK-NEXT: .cfi_offset b8, -32
667661
; CHECK-NEXT: cmp w0, #2
668662
; CHECK-NEXT: b.lt .LBB9_3
669663
; CHECK-NEXT: // %bb.1: // %land.lhs.true
670664
; CHECK-NEXT: ldr x8, [x1, #8]
671665
; CHECK-NEXT: cbz x8, .LBB9_3
672666
; CHECK-NEXT: // %bb.2:
673-
; CHECK-NEXT: mov w0, #3
674-
; CHECK-NEXT: b .LBB9_4
667+
; CHECK-NEXT: mov w0, #3 // =0x3
668+
; CHECK-NEXT: ret
675669
; CHECK-NEXT: .LBB9_3: // %if.end
676-
; CHECK-NEXT: mov w0, #1
670+
; CHECK-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill
671+
; CHECK-NEXT: .cfi_def_cfa_offset 32
672+
; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
673+
; CHECK-NEXT: .cfi_offset w19, -8
674+
; CHECK-NEXT: .cfi_offset w30, -16
675+
; CHECK-NEXT: .cfi_offset b8, -32
676+
; CHECK-NEXT: mov w0, #1 // =0x1
677677
; CHECK-NEXT: bl zoo
678678
; CHECK-NEXT: mov w19, w0
679-
; CHECK-NEXT: mov w0, #-1
679+
; CHECK-NEXT: mov w0, #-1 // =0xffffffff
680680
; CHECK-NEXT: bl yoo
681681
; CHECK-NEXT: cmp w19, #0
682-
; CHECK-NEXT: mov w1, #2
682+
; CHECK-NEXT: mov w1, #2 // =0x2
683683
; CHECK-NEXT: cinc w0, w19, gt
684684
; CHECK-NEXT: fmov d8, d0
685685
; CHECK-NEXT: bl xoo
@@ -689,9 +689,8 @@ define i32 @fcmpri(i32 %argc, ptr nocapture readonly %argv) #0 {
689689
; CHECK-NEXT: fadd d0, d8, d0
690690
; CHECK-NEXT: fcsel d0, d8, d0, gt
691691
; CHECK-NEXT: bl woo
692-
; CHECK-NEXT: mov w0, #4
693-
; CHECK-NEXT: .LBB9_4: // %return
694692
; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
693+
; CHECK-NEXT: mov w0, #4 // =0x4
695694
; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload
696695
; CHECK-NEXT: .cfi_def_cfa_offset 0
697696
; CHECK-NEXT: .cfi_restore w19

llvm/test/CodeGen/AArch64/csr-split.ll

Lines changed: 11 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,17 @@
99
define dso_local signext i32 @test1(ptr %b) local_unnamed_addr uwtable {
1010
; CHECK-LABEL: test1:
1111
; CHECK: // %bb.0: // %entry
12-
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
13-
; CHECK-NEXT: .cfi_def_cfa_offset 16
14-
; CHECK-NEXT: .cfi_offset w19, -8
15-
; CHECK-NEXT: .cfi_offset w30, -16
16-
; CHECK-NEXT: .cfi_remember_state
1712
; CHECK-NEXT: adrp x8, a
1813
; CHECK-NEXT: ldrsw x8, [x8, :lo12:a]
1914
; CHECK-NEXT: cmp x8, x0
2015
; CHECK-NEXT: b.eq .LBB0_2
2116
; CHECK-NEXT: // %bb.1: // %if.end
22-
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
23-
; CHECK-NEXT: .cfi_def_cfa_offset 0
24-
; CHECK-NEXT: .cfi_restore w19
25-
; CHECK-NEXT: .cfi_restore w30
2617
; CHECK-NEXT: ret
2718
; CHECK-NEXT: .LBB0_2: // %if.then
28-
; CHECK-NEXT: .cfi_restore_state
19+
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
20+
; CHECK-NEXT: .cfi_def_cfa_offset 16
21+
; CHECK-NEXT: .cfi_offset w19, -8
22+
; CHECK-NEXT: .cfi_offset w30, -16
2923
; CHECK-NEXT: mov x19, x0
3024
; CHECK-NEXT: bl callVoid
3125
; CHECK-NEXT: mov x0, x19
@@ -37,31 +31,22 @@ define dso_local signext i32 @test1(ptr %b) local_unnamed_addr uwtable {
3731
;
3832
; CHECK-APPLE-LABEL: test1:
3933
; CHECK-APPLE: ; %bb.0: ; %entry
40-
; CHECK-APPLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
41-
; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 32
42-
; CHECK-APPLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
43-
; CHECK-APPLE-NEXT: .cfi_offset w30, -8
44-
; CHECK-APPLE-NEXT: .cfi_offset w29, -16
45-
; CHECK-APPLE-NEXT: .cfi_offset w19, -24
46-
; CHECK-APPLE-NEXT: .cfi_offset w20, -32
47-
; CHECK-APPLE-NEXT: .cfi_remember_state
4834
; CHECK-APPLE-NEXT: Lloh0:
4935
; CHECK-APPLE-NEXT: adrp x8, _a@PAGE
5036
; CHECK-APPLE-NEXT: Lloh1:
5137
; CHECK-APPLE-NEXT: ldrsw x8, [x8, _a@PAGEOFF]
5238
; CHECK-APPLE-NEXT: cmp x8, x0
5339
; CHECK-APPLE-NEXT: b.eq LBB0_2
5440
; CHECK-APPLE-NEXT: ; %bb.1: ; %if.end
55-
; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
56-
; CHECK-APPLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
57-
; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 0
58-
; CHECK-APPLE-NEXT: .cfi_restore w30
59-
; CHECK-APPLE-NEXT: .cfi_restore w29
60-
; CHECK-APPLE-NEXT: .cfi_restore w19
61-
; CHECK-APPLE-NEXT: .cfi_restore w20
6241
; CHECK-APPLE-NEXT: ret
6342
; CHECK-APPLE-NEXT: LBB0_2: ; %if.then
64-
; CHECK-APPLE-NEXT: .cfi_restore_state
43+
; CHECK-APPLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
44+
; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 32
45+
; CHECK-APPLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
46+
; CHECK-APPLE-NEXT: .cfi_offset w30, -8
47+
; CHECK-APPLE-NEXT: .cfi_offset w29, -16
48+
; CHECK-APPLE-NEXT: .cfi_offset w19, -24
49+
; CHECK-APPLE-NEXT: .cfi_offset w20, -32
6550
; CHECK-APPLE-NEXT: mov x19, x0
6651
; CHECK-APPLE-NEXT: bl _callVoid
6752
; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload

llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,6 @@ declare void @fn(ptr, ptr)
1212
define void @test_regular_pointers(ptr %a, ptr %b) {
1313
; CHECK-LABEL: test_regular_pointers:
1414
; CHECK: ; %bb.0: ; %entry
15-
; CHECK-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
16-
; CHECK-NEXT: .cfi_def_cfa_offset 32
17-
; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
18-
; CHECK-NEXT: .cfi_offset w30, -8
19-
; CHECK-NEXT: .cfi_offset w29, -16
20-
; CHECK-NEXT: .cfi_offset w19, -24
21-
; CHECK-NEXT: .cfi_offset w20, -32
2215
; CHECK-NEXT: ldr d0, [x0]
2316
; CHECK-NEXT: mov x8, #1 ; =0x1
2417
; CHECK-NEXT: ldr d1, [x1, #8]
@@ -30,12 +23,24 @@ define void @test_regular_pointers(ptr %a, ptr %b) {
3023
; CHECK-NEXT: b.mi LBB0_2
3124
; CHECK-NEXT: b.gt LBB0_2
3225
; CHECK-NEXT: ; %bb.1: ; %then
26+
; CHECK-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
27+
; CHECK-NEXT: .cfi_def_cfa_offset 32
28+
; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
29+
; CHECK-NEXT: .cfi_offset w30, -8
30+
; CHECK-NEXT: .cfi_offset w29, -16
31+
; CHECK-NEXT: .cfi_offset w19, -24
32+
; CHECK-NEXT: .cfi_offset w20, -32
3333
; CHECK-NEXT: mov x19, x1
3434
; CHECK-NEXT: bl _fn
35-
; CHECK-NEXT: str xzr, [x19]
36-
; CHECK-NEXT: LBB0_2: ; %exit
3735
; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
36+
; CHECK-NEXT: str xzr, [x19]
3837
; CHECK-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
38+
; CHECK-NEXT: LBB0_2: ; %exit
39+
; CHECK-NEXT: .cfi_def_cfa wsp, 0
40+
; CHECK-NEXT: .cfi_same_value w30
41+
; CHECK-NEXT: .cfi_same_value w29
42+
; CHECK-NEXT: .cfi_same_value w19
43+
; CHECK-NEXT: .cfi_same_value w20
3944
; CHECK-NEXT: ret
4045
entry:
4146
%l.a = load double, ptr %a, align 8

llvm/test/CodeGen/AArch64/taildup-cfi.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ if.then: ; preds = %entry
3232
store i32 0, ptr @f, align 4, !tbaa !2
3333
br label %if.end
3434

35-
; DARWIN: Merging into block
35+
; DARWIN-NOT: Merging into block
3636
; LINUX: Merging into block
3737

3838
if.end: ; preds = %entry.if.end_crit_edge, %if.then

0 commit comments

Comments
 (0)