Skip to content

Commit 7af7064

Browse files
Revert "[AArch64] Remove unused ReverseCSRRestoreSeq option. (#82326)"
Patch 3f0404a is breaking some debugs build so we cannot use the reverse here. This reverts commit 493f101.
1 parent 8302cef commit 7af7064

File tree

2 files changed

+146
-21
lines changed

2 files changed

+146
-21
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,11 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone",
239239
cl::desc("enable use of redzone on AArch64"),
240240
cl::init(false), cl::Hidden);
241241

242+
static cl::opt<bool>
243+
ReverseCSRRestoreSeq("reverse-csr-restore-seq",
244+
cl::desc("reverse the CSR restore sequence"),
245+
cl::init(false), cl::Hidden);
246+
242247
static cl::opt<bool> StackTaggingMergeSetTag(
243248
"stack-tagging-merge-settag",
244249
cl::desc("merge settag instruction in function epilog"), cl::init(true),
@@ -302,6 +307,8 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
302307
return false;
303308
if (!EnableHomogeneousPrologEpilog)
304309
return false;
310+
if (ReverseCSRRestoreSeq)
311+
return false;
305312
if (EnableRedZone)
306313
return false;
307314

@@ -3104,27 +3111,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
31043111

31053112
computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
31063113

3107-
if (homogeneousPrologEpilog(MF, &MBB)) {
3108-
auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
3109-
.setMIFlag(MachineInstr::FrameDestroy);
3110-
for (auto &RPI : RegPairs) {
3111-
MIB.addReg(RPI.Reg1, RegState::Define);
3112-
MIB.addReg(RPI.Reg2, RegState::Define);
3113-
}
3114-
return true;
3115-
}
3116-
3117-
// For performance reasons restore SVE register in increasing order
3118-
auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
3119-
auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
3120-
auto PPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsPPR);
3121-
std::reverse(PPRBegin, PPREnd.base());
3122-
auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };
3123-
auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
3124-
auto ZPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsZPR);
3125-
std::reverse(ZPRBegin, ZPREnd.base());
3126-
3127-
for (const RegPairInfo &RPI : RegPairs) {
3114+
auto EmitMI = [&](const RegPairInfo &RPI) -> MachineBasicBlock::iterator {
31283115
unsigned Reg1 = RPI.Reg1;
31293116
unsigned Reg2 = RPI.Reg2;
31303117

@@ -3198,6 +3185,43 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
31983185
MachineMemOperand::MOLoad, Size, Alignment));
31993186
if (NeedsWinCFI)
32003187
InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
3188+
3189+
return MIB->getIterator();
3190+
};
3191+
3192+
if (homogeneousPrologEpilog(MF, &MBB)) {
3193+
auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
3194+
.setMIFlag(MachineInstr::FrameDestroy);
3195+
for (auto &RPI : RegPairs) {
3196+
MIB.addReg(RPI.Reg1, RegState::Define);
3197+
MIB.addReg(RPI.Reg2, RegState::Define);
3198+
}
3199+
return true;
3200+
}
3201+
3202+
// For performance reasons restore SVE register in increasing order
3203+
auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
3204+
auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
3205+
auto PPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsPPR);
3206+
std::reverse(PPRBegin, PPREnd.base());
3207+
auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };
3208+
auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
3209+
auto ZPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsZPR);
3210+
std::reverse(ZPRBegin, ZPREnd.base());
3211+
3212+
if (ReverseCSRRestoreSeq) {
3213+
MachineBasicBlock::iterator First = MBB.end();
3214+
for (const RegPairInfo &RPI : reverse(RegPairs)) {
3215+
MachineBasicBlock::iterator It = EmitMI(RPI);
3216+
if (First == MBB.end())
3217+
First = It;
3218+
}
3219+
if (First != MBB.end())
3220+
MBB.splice(MBBI, &MBB, First);
3221+
} else {
3222+
for (const RegPairInfo &RPI : RegPairs) {
3223+
(void)EmitMI(RPI);
3224+
}
32013225
}
32023226

32033227
return true;
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# RUN: llc -run-pass=prologepilog -reverse-csr-restore-seq -o - -mtriple=aarch64-- %s | FileCheck %s --check-prefixes=CHECK
2+
# RUN: llc -start-before=prologepilog -stop-after=aarch64-ldst-opt -reverse-csr-restore-seq -o - -mtriple=aarch64-- %s | FileCheck %s --check-prefixes=CHECK
3+
#
4+
--- |
5+
6+
define void @foo() nounwind { entry: unreachable }
7+
8+
define void @bar() nounwind { entry: unreachable }
9+
10+
define void @baz() nounwind { entry: unreachable }
11+
12+
...
13+
---
14+
name: foo
15+
# CHECK-LABEL: name: foo
16+
tracksRegLiveness: true
17+
body: |
18+
bb.0:
19+
$x19 = IMPLICIT_DEF
20+
$x20 = IMPLICIT_DEF
21+
$x21 = IMPLICIT_DEF
22+
$x22 = IMPLICIT_DEF
23+
$x23 = IMPLICIT_DEF
24+
$x24 = IMPLICIT_DEF
25+
$x25 = IMPLICIT_DEF
26+
$x26 = IMPLICIT_DEF
27+
28+
; The local stack size is 0, so the last ldp in the sequence will also
29+
; restore the stack.
30+
; CHECK: $x24, $x23 = frame-destroy LDPXi $sp, 2
31+
; CHECK-NEXT: $x22, $x21 = frame-destroy LDPXi $sp, 4
32+
; CHECK-NEXT: $x20, $x19 = frame-destroy LDPXi $sp, 6
33+
34+
; The ldp and the stack increment get merged even before
35+
; the load-store optimizer.
36+
; CHECK-NEXT: early-clobber $sp, $x26, $x25 = frame-destroy LDPXpost $sp, 8
37+
38+
RET_ReallyLR
39+
...
40+
---
41+
name: bar
42+
# CHECK-LABEL: name: bar
43+
tracksRegLiveness: true
44+
stack:
45+
- { id : 0, size: 8, alignment: 4,
46+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
47+
local-offset: -4, debug-info-variable: '', debug-info-expression: '',
48+
debug-info-location: '' }
49+
50+
body: |
51+
bb.0:
52+
$x19 = IMPLICIT_DEF
53+
$x20 = IMPLICIT_DEF
54+
$x21 = IMPLICIT_DEF
55+
$x22 = IMPLICIT_DEF
56+
$x23 = IMPLICIT_DEF
57+
$x24 = IMPLICIT_DEF
58+
$x25 = IMPLICIT_DEF
59+
$x26 = IMPLICIT_DEF
60+
61+
; The local stack size is not 0, and we can combine the CSR stack size with
62+
; the local stack size. This results in rewriting the offsets for all the
63+
; save/restores and forbids us to merge the stack adjustment and the last pop.
64+
; In this case, there is no point of moving the first CSR pair at the end.
65+
; We do it anyway, as it's a small price to pay for the resulting
66+
; simplification in the epilogue emission code.
67+
; CHECK: $x24, $x23 = frame-destroy LDPXi $sp, 4
68+
; CHECK-NEXT: $x22, $x21 = frame-destroy LDPXi $sp, 6
69+
; CHECK-NEXT: $x20, $x19 = frame-destroy LDPXi $sp, 8
70+
; CHECK-NEXT: $x26, $x25 = frame-destroy LDPXi $sp, 2
71+
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 80, 0
72+
RET_ReallyLR
73+
...
74+
---
75+
# Check that the load from the offset 0 is moved at the end even when hasFP is
76+
# false.
77+
name: baz
78+
# CHECK-LABEL: name: baz
79+
alignment: 4
80+
tracksRegLiveness: true
81+
frameInfo:
82+
adjustsStack: true
83+
hasCalls: true
84+
body: |
85+
bb.0:
86+
successors: %bb.1
87+
88+
$x0 = IMPLICIT_DEF
89+
$x20 = IMPLICIT_DEF
90+
$x21 = IMPLICIT_DEF
91+
92+
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
93+
BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0
94+
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
95+
B %bb.1
96+
97+
bb.1:
98+
; CHECK: $x21, $x20 = frame-destroy LDPXi $sp, 2
99+
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 32
100+
RET_ReallyLR
101+
...

0 commit comments

Comments
 (0)