Skip to content

Commit 061c7db

Browse files
[AArch64][SME] Save VG for unwind info when changing streaming-mode
If a function requires any streaming-mode change, the vector granule value must be stored to the stack and unwind info must also describe the save of VG to this location. This patch adds VG to the list of callee-saved registers and increases the callee-saved stack size in determineCalleeSaves if the function requires streaming-mode changes. A new type is added to RegPairInfo for VG, which is also used to skip restoring the register in the restore block. See https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst
1 parent 3a2c70b commit 061c7db

9 files changed

+1109
-271
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
321321
return false;
322322

323323
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
324-
if (AFI->hasSwiftAsyncContext())
324+
if (AFI->hasSwiftAsyncContext() || AFI->hasStreamingModeChanges())
325325
return false;
326326

327327
// If there are an odd number of GPRs before LR and FP in the CSRs list,
@@ -691,6 +691,9 @@ static void emitCalleeSavedRestores(MachineBasicBlock &MBB,
691691
!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
692692
continue;
693693

694+
if (!Info.isRestored())
695+
continue;
696+
694697
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(
695698
nullptr, TRI.getDwarfRegNum(Info.getReg(), true)));
696699
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -1344,6 +1347,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
13441347
MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
13451348
int CFAOffset = 0) {
13461349
unsigned NewOpc;
1350+
13471351
switch (MBBI->getOpcode()) {
13481352
default:
13491353
llvm_unreachable("Unexpected callee-save save/restore opcode!");
@@ -1651,6 +1655,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
16511655
LiveRegs.removeReg(AArch64::LR);
16521656
}
16531657

1658+
// If the function contains streaming mode changes, we expect the first
1659+
// instruction of MBB to be a CNTD. Move past this instruction if found.
1660+
if (AFI->hasStreamingModeChanges()) {
1661+
assert(MBBI->getOpcode() == AArch64::CNTD_XPiI && "Unexpected instruction");
1662+
MBBI = std::next(MBBI);
1663+
}
1664+
16541665
auto VerifyClobberOnExit = make_scope_exit([&]() {
16551666
if (NonFrameStart == MBB.end())
16561667
return;
@@ -2756,7 +2767,7 @@ struct RegPairInfo {
27562767
unsigned Reg2 = AArch64::NoRegister;
27572768
int FrameIdx;
27582769
int Offset;
2759-
enum RegType { GPR, FPR64, FPR128, PPR, ZPR } Type;
2770+
enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG } Type;
27602771

27612772
RegPairInfo() = default;
27622773

@@ -2768,6 +2779,7 @@ struct RegPairInfo {
27682779
return 2;
27692780
case GPR:
27702781
case FPR64:
2782+
case VG:
27712783
return 8;
27722784
case ZPR:
27732785
case FPR128:
@@ -2833,6 +2845,8 @@ static void computeCalleeSaveRegisterPairs(
28332845
RPI.Type = RegPairInfo::ZPR;
28342846
else if (AArch64::PPRRegClass.contains(RPI.Reg1))
28352847
RPI.Type = RegPairInfo::PPR;
2848+
else if (RPI.Reg1 == AArch64::VG)
2849+
RPI.Type = RegPairInfo::VG;
28362850
else
28372851
llvm_unreachable("Unsupported register class.");
28382852

@@ -2860,6 +2874,7 @@ static void computeCalleeSaveRegisterPairs(
28602874
break;
28612875
case RegPairInfo::PPR:
28622876
case RegPairInfo::ZPR:
2877+
case RegPairInfo::VG:
28632878
break;
28642879
}
28652880
}
@@ -3047,7 +3062,23 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
30473062
Size = 2;
30483063
Alignment = Align(2);
30493064
break;
3065+
case RegPairInfo::VG:
3066+
StrOpc = AArch64::STRXui;
3067+
Size = 8;
3068+
Alignment = Align(8);
3069+
break;
30503070
}
3071+
3072+
if (Reg1 == AArch64::VG) {
3073+
// Find an available register to store value of VG to.
3074+
Reg1 = findScratchNonCalleeSaveRegister(&MBB);
3075+
assert(Reg1 != AArch64::NoRegister);
3076+
3077+
BuildMI(MBB, MBB.begin(), DL, TII.get(AArch64::CNTD_XPiI), Reg1)
3078+
.addImm(31)
3079+
.addImm(1);
3080+
}
3081+
30513082
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
30523083
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
30533084
dbgs() << ") -> fi#(" << RPI.FrameIdx;
@@ -3171,6 +3202,8 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
31713202
Size = 2;
31723203
Alignment = Align(2);
31733204
break;
3205+
case RegPairInfo::VG:
3206+
continue;
31743207
}
31753208
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
31763209
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
@@ -3313,6 +3346,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
33133346
CSStackSize += RegSize;
33143347
}
33153348

3349+
// Increase the callee-saved stack size if the function has streaming mode
3350+
// changes, as we will need to spill the value of the VG register.
3351+
if (AFI->hasStreamingModeChanges())
3352+
CSStackSize += 8;
3353+
33163354
// Save number of saved regs, so we can easily update CSStackSize later.
33173355
unsigned NumSavedRegs = SavedRegs.count();
33183356

@@ -3449,6 +3487,23 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
34493487
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
34503488
}
34513489

3490+
// Insert VG into the list of CSRs, immediately before LR if saved.
3491+
if (AFI->hasStreamingModeChanges()) {
3492+
auto VGInfo = CalleeSavedInfo(AArch64::VG);
3493+
VGInfo.setRestored(false);
3494+
bool InsertBeforeLR = false;
3495+
3496+
for (unsigned I = 0; I < CSI.size(); I++)
3497+
if (CSI[I].getReg() == AArch64::LR) {
3498+
InsertBeforeLR = true;
3499+
CSI.insert(CSI.begin() + I, VGInfo);
3500+
break;
3501+
}
3502+
3503+
if (!InsertBeforeLR)
3504+
CSI.push_back(VGInfo);
3505+
}
3506+
34523507
for (auto &CS : CSI) {
34533508
Register Reg = CS.getReg();
34543509
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);

llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc < %s | FileCheck %s
2+
; RUN: llc -mattr=+sve < %s | FileCheck %s
33

44
; Verify that the following code can be compiled without +sme, because if the
55
; call is not entered in streaming-SVE mode at runtime, the codepath leading
@@ -10,11 +10,13 @@ target triple = "aarch64"
1010
define void @streaming_compatible() #0 {
1111
; CHECK-LABEL: streaming_compatible:
1212
; CHECK: // %bb.0:
13-
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
13+
; CHECK-NEXT: cntd x9
14+
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
1415
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
1516
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
1617
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
17-
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
18+
; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
19+
; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
1820
; CHECK-NEXT: bl __arm_sme_state
1921
; CHECK-NEXT: and x19, x0, #0x1
2022
; CHECK-NEXT: tbz w19, #0, .LBB0_2
@@ -26,11 +28,12 @@ define void @streaming_compatible() #0 {
2628
; CHECK-NEXT: // %bb.3:
2729
; CHECK-NEXT: smstart sm
2830
; CHECK-NEXT: .LBB0_4:
29-
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
3031
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
32+
; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
3133
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
34+
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
3235
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
33-
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
36+
; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
3437
; CHECK-NEXT: ret
3538
call void @non_streaming()
3639
ret void

llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@ declare double @normal_callee(double)
1111
define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline optnone {
1212
; CHECK-FISEL-LABEL: nonstreaming_caller_streaming_callee:
1313
; CHECK-FISEL: // %bb.0: // %entry
14+
; CHECK-FISEL-NEXT: cntd x9
1415
; CHECK-FISEL-NEXT: sub sp, sp, #96
1516
; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1617
; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1718
; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1819
; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1920
; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
21+
; CHECK-FISEL-NEXT: str x9, [sp, #88] // 8-byte Folded Spill
2022
; CHECK-FISEL-NEXT: str d0, [sp] // 8-byte Folded Spill
2123
; CHECK-FISEL-NEXT: smstart sm
2224
; CHECK-FISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload
@@ -37,12 +39,14 @@ define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline
3739
;
3840
; CHECK-GISEL-LABEL: nonstreaming_caller_streaming_callee:
3941
; CHECK-GISEL: // %bb.0: // %entry
42+
; CHECK-GISEL-NEXT: cntd x9
4043
; CHECK-GISEL-NEXT: sub sp, sp, #96
4144
; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
4245
; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
4346
; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
4447
; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
4548
; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
49+
; CHECK-GISEL-NEXT: str x9, [sp, #88] // 8-byte Folded Spill
4650
; CHECK-GISEL-NEXT: str d0, [sp] // 8-byte Folded Spill
4751
; CHECK-GISEL-NEXT: smstart sm
4852
; CHECK-GISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload
@@ -70,12 +74,14 @@ entry:
7074
define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_enabled" {
7175
; CHECK-COMMON-LABEL: streaming_caller_nonstreaming_callee:
7276
; CHECK-COMMON: // %bb.0: // %entry
77+
; CHECK-COMMON-NEXT: cntd x9
7378
; CHECK-COMMON-NEXT: sub sp, sp, #96
7479
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
7580
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
7681
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
7782
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
7883
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
84+
; CHECK-COMMON-NEXT: str x9, [sp, #88] // 8-byte Folded Spill
7985
; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill
8086
; CHECK-COMMON-NEXT: smstop sm
8187
; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload
@@ -102,12 +108,14 @@ entry:
102108
define double @locally_streaming_caller_normal_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" {
103109
; CHECK-COMMON-LABEL: locally_streaming_caller_normal_callee:
104110
; CHECK-COMMON: // %bb.0:
111+
; CHECK-COMMON-NEXT: cntd x9
105112
; CHECK-COMMON-NEXT: sub sp, sp, #112
106113
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
107114
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
108115
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
109116
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
110117
; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
118+
; CHECK-COMMON-NEXT: str x9, [sp, #104] // 8-byte Folded Spill
111119
; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill
112120
; CHECK-COMMON-NEXT: smstart sm
113121
; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload
@@ -166,11 +174,13 @@ define double @normal_caller_to_locally_streaming_callee(double %x) nounwind noi
166174
define void @locally_streaming_caller_streaming_callee_ptr(ptr %p) nounwind noinline optnone "aarch64_pstate_sm_body" {
167175
; CHECK-COMMON-LABEL: locally_streaming_caller_streaming_callee_ptr:
168176
; CHECK-COMMON: // %bb.0:
177+
; CHECK-COMMON-NEXT: cntd x9
169178
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
170179
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
171180
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
172181
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
173182
; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
183+
; CHECK-COMMON-NEXT: str x9, [sp, #72] // 8-byte Folded Spill
174184
; CHECK-COMMON-NEXT: smstart sm
175185
; CHECK-COMMON-NEXT: blr x0
176186
; CHECK-COMMON-NEXT: smstop sm
@@ -187,11 +197,13 @@ define void @locally_streaming_caller_streaming_callee_ptr(ptr %p) nounwind noin
187197
define void @normal_call_to_streaming_callee_ptr(ptr %p) nounwind noinline optnone {
188198
; CHECK-COMMON-LABEL: normal_call_to_streaming_callee_ptr:
189199
; CHECK-COMMON: // %bb.0:
200+
; CHECK-COMMON-NEXT: cntd x9
190201
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
191202
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
192203
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
193204
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
194205
; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
206+
; CHECK-COMMON-NEXT: str x9, [sp, #72] // 8-byte Folded Spill
195207
; CHECK-COMMON-NEXT: smstart sm
196208
; CHECK-COMMON-NEXT: blr x0
197209
; CHECK-COMMON-NEXT: smstop sm
@@ -325,12 +337,13 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind {
325337
define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounwind {
326338
; CHECK-COMMON-LABEL: f128_call_sm:
327339
; CHECK-COMMON: // %bb.0:
340+
; CHECK-COMMON-NEXT: cntd x9
328341
; CHECK-COMMON-NEXT: sub sp, sp, #112
329342
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
330343
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
331344
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
332345
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
333-
; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
346+
; CHECK-COMMON-NEXT: stp x30, x9, [sp, #96] // 16-byte Folded Spill
334347
; CHECK-COMMON-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
335348
; CHECK-COMMON-NEXT: smstop sm
336349
; CHECK-COMMON-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
@@ -386,12 +399,13 @@ define double @frem_call_za(double %a, double %b) "aarch64_inout_za" nounwind {
386399
define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounwind {
387400
; CHECK-COMMON-LABEL: frem_call_sm:
388401
; CHECK-COMMON: // %bb.0:
402+
; CHECK-COMMON-NEXT: cntd x9
389403
; CHECK-COMMON-NEXT: sub sp, sp, #96
390404
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
391405
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
392406
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
393407
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
394-
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
408+
; CHECK-COMMON-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
395409
; CHECK-COMMON-NEXT: stp s1, s0, [sp, #8] // 8-byte Folded Spill
396410
; CHECK-COMMON-NEXT: smstop sm
397411
; CHECK-COMMON-NEXT: ldp s1, s0, [sp, #8] // 8-byte Folded Reload
@@ -414,12 +428,14 @@ define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounw
414428
define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compatible" nounwind {
415429
; CHECK-COMMON-LABEL: frem_call_sm_compat:
416430
; CHECK-COMMON: // %bb.0:
417-
; CHECK-COMMON-NEXT: sub sp, sp, #96
431+
; CHECK-COMMON-NEXT: cntd x9
432+
; CHECK-COMMON-NEXT: sub sp, sp, #112
418433
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
419434
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
420435
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
421436
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
422-
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
437+
; CHECK-COMMON-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
438+
; CHECK-COMMON-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
423439
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
424440
; CHECK-COMMON-NEXT: bl __arm_sme_state
425441
; CHECK-COMMON-NEXT: ldp s2, s0, [sp, #8] // 8-byte Folded Reload
@@ -436,13 +452,14 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
436452
; CHECK-COMMON-NEXT: // %bb.3:
437453
; CHECK-COMMON-NEXT: smstart sm
438454
; CHECK-COMMON-NEXT: .LBB12_4:
439-
; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
440-
; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
441455
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
456+
; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
442457
; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
458+
; CHECK-COMMON-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
459+
; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
443460
; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
444461
; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
445-
; CHECK-COMMON-NEXT: add sp, sp, #96
462+
; CHECK-COMMON-NEXT: add sp, sp, #112
446463
; CHECK-COMMON-NEXT: ret
447464
%res = frem float %a, %b
448465
ret float %res

llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,14 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inou
121121
define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za" "aarch64_pstate_sm_compatible" {
122122
; CHECK-LABEL: test_lazy_save_and_conditional_smstart:
123123
; CHECK: // %bb.0:
124+
; CHECK-NEXT: cntd x9
124125
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
125126
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
126127
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
127128
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
128129
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
129130
; CHECK-NEXT: add x29, sp, #64
130-
; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
131+
; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
131132
; CHECK-NEXT: sub sp, sp, #16
132133
; CHECK-NEXT: rdsvl x8, #1
133134
; CHECK-NEXT: mov x9, sp
@@ -160,7 +161,7 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
160161
; CHECK-NEXT: msr TPIDR2_EL0, xzr
161162
; CHECK-NEXT: sub sp, x29, #64
162163
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
163-
; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
164+
; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
164165
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
165166
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
166167
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload

0 commit comments

Comments
 (0)