Skip to content

Commit cf55b9c

Browse files
aemersontru
authored andcommitted
[AArch64][Darwin][SME] Don't try to save VG to the stack for unwinding.
On Darwin we don't have any hardware that has SVE support, only SME. Therefore we don't need to save VG for unwinders and can safely omit it. This also fixes crashes introduced since this feature landed since Darwin's compact unwind code can't handle the presence of VG anyway. rdar://131072344
1 parent e6bcdea commit cf55b9c

File tree

3 files changed

+189
-15
lines changed

3 files changed

+189
-15
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1394,6 +1394,18 @@ bool requiresGetVGCall(MachineFunction &MF) {
13941394
!MF.getSubtarget<AArch64Subtarget>().hasSVE();
13951395
}
13961396

1397+
static bool requiresSaveVG(MachineFunction &MF) {
1398+
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1399+
// For Darwin platforms we don't save VG for non-SVE functions, even if SME
1400+
// is enabled with streaming mode changes.
1401+
if (!AFI->hasStreamingModeChanges())
1402+
return false;
1403+
auto &ST = MF.getSubtarget<AArch64Subtarget>();
1404+
if (ST.isTargetDarwin())
1405+
return ST.hasSVE();
1406+
return true;
1407+
}
1408+
13971409
bool isVGInstruction(MachineBasicBlock::iterator MBBI) {
13981410
unsigned Opc = MBBI->getOpcode();
13991411
if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
@@ -1430,8 +1442,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
14301442
// functions, we need to do this for both the streaming and non-streaming
14311443
// vector length. Move past these instructions if necessary.
14321444
MachineFunction &MF = *MBB.getParent();
1433-
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1434-
if (AFI->hasStreamingModeChanges())
1445+
if (requiresSaveVG(MF))
14351446
while (isVGInstruction(MBBI))
14361447
++MBBI;
14371448

@@ -1937,7 +1948,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
19371948
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
19381949
!IsSVECalleeSave(MBBI)) {
19391950
// Move past instructions generated to calculate VG
1940-
if (AFI->hasStreamingModeChanges())
1951+
if (requiresSaveVG(MF))
19411952
while (isVGInstruction(MBBI))
19421953
++MBBI;
19431954

@@ -3720,7 +3731,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
37203731
// non-streaming VG value.
37213732
const Function &F = MF.getFunction();
37223733
SMEAttrs Attrs(F);
3723-
if (AFI->hasStreamingModeChanges()) {
3734+
if (requiresSaveVG(MF)) {
37243735
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
37253736
CSStackSize += 16;
37263737
else
@@ -3873,7 +3884,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
38733884
}
38743885

38753886
// Insert VG into the list of CSRs, immediately before LR if saved.
3876-
if (AFI->hasStreamingModeChanges()) {
3887+
if (requiresSaveVG(MF)) {
38773888
std::vector<CalleeSavedInfo> VGSaves;
38783889
SMEAttrs Attrs(MF.getFunction());
38793890

@@ -4602,10 +4613,9 @@ MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II,
46024613

46034614
void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
46044615
MachineFunction &MF, RegScavenger *RS = nullptr) const {
4605-
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
46064616
for (auto &BB : MF)
46074617
for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) {
4608-
if (AFI->hasStreamingModeChanges())
4618+
if (requiresSaveVG(MF))
46094619
II = emitVGSaveRestore(II, this);
46104620
if (StackTaggingMergeSetTag)
46114621
II = tryMergeAdjacentSTG(II, this, RS);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8732,10 +8732,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
87328732

87338733
SDValue InGlue;
87348734
if (RequiresSMChange) {
8735-
8736-
Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
8737-
DAG.getVTList(MVT::Other, MVT::Glue), Chain);
8738-
InGlue = Chain.getValue(1);
8735+
if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
8736+
Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
8737+
DAG.getVTList(MVT::Other, MVT::Glue), Chain);
8738+
InGlue = Chain.getValue(1);
8739+
}
87398740

87408741
SDValue NewChain = changeStreamingMode(
87418742
DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue,
@@ -8914,11 +8915,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
89148915
Result = changeStreamingMode(
89158916
DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue,
89168917
getSMCondition(CallerAttrs, CalleeAttrs), PStateSM);
8917-
InGlue = Result.getValue(1);
89188918

8919-
Result =
8920-
DAG.getNode(AArch64ISD::VG_RESTORE, DL,
8921-
DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
8919+
if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
8920+
InGlue = Result.getValue(1);
8921+
Result =
8922+
DAG.getNode(AArch64ISD::VG_RESTORE, DL,
8923+
DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
8924+
}
89228925
}
89238926

89248927
if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs))
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -o - %s | FileCheck %s
3+
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
4+
target triple = "arm64-apple-macosx14.0.0"
5+
6+
; Check we don't crash on Darwin and that we don't try to save VG
7+
; when only SME (and not SVE) is enabled.
8+
9+
; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync)
10+
define noundef i32 @main() local_unnamed_addr #0 {
11+
; CHECK-LABEL: main:
12+
; CHECK: ; %bb.0: ; %entry
13+
; CHECK-NEXT: stp d15, d14, [sp, #-80]! ; 16-byte Folded Spill
14+
; CHECK-NEXT: .cfi_def_cfa_offset 80
15+
; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill
16+
; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill
17+
; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill
18+
; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill
19+
; CHECK-NEXT: .cfi_offset w30, -8
20+
; CHECK-NEXT: .cfi_offset w29, -16
21+
; CHECK-NEXT: .cfi_offset b8, -24
22+
; CHECK-NEXT: .cfi_offset b9, -32
23+
; CHECK-NEXT: .cfi_offset b10, -40
24+
; CHECK-NEXT: .cfi_offset b11, -48
25+
; CHECK-NEXT: .cfi_offset b12, -56
26+
; CHECK-NEXT: .cfi_offset b13, -64
27+
; CHECK-NEXT: .cfi_offset b14, -72
28+
; CHECK-NEXT: .cfi_offset b15, -80
29+
; CHECK-NEXT: smstart sm
30+
; CHECK-NEXT: bl __ZL9sme_crashv
31+
; CHECK-NEXT: smstop sm
32+
; CHECK-NEXT: mov w0, #0 ; =0x0
33+
; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
34+
; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
35+
; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
36+
; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
37+
; CHECK-NEXT: ldp d15, d14, [sp], #80 ; 16-byte Folded Reload
38+
; CHECK-NEXT: .cfi_def_cfa_offset 0
39+
; CHECK-NEXT: .cfi_restore w30
40+
; CHECK-NEXT: .cfi_restore w29
41+
; CHECK-NEXT: .cfi_restore b8
42+
; CHECK-NEXT: .cfi_restore b9
43+
; CHECK-NEXT: .cfi_restore b10
44+
; CHECK-NEXT: .cfi_restore b11
45+
; CHECK-NEXT: .cfi_restore b12
46+
; CHECK-NEXT: .cfi_restore b13
47+
; CHECK-NEXT: .cfi_restore b14
48+
; CHECK-NEXT: .cfi_restore b15
49+
; CHECK-NEXT: ret
50+
entry:
51+
tail call fastcc void @_ZL9sme_crashv() #4
52+
ret i32 0
53+
}
54+
55+
; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync)
56+
define internal fastcc void @_ZL9sme_crashv() unnamed_addr #1 {
57+
; CHECK-LABEL: _ZL9sme_crashv:
58+
; CHECK: ; %bb.0: ; %entry
59+
; CHECK-NEXT: stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill
60+
; CHECK-NEXT: .cfi_def_cfa_offset 96
61+
; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill
62+
; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill
63+
; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill
64+
; CHECK-NEXT: stp x28, x27, [sp, #64] ; 16-byte Folded Spill
65+
; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill
66+
; CHECK-NEXT: add x29, sp, #80
67+
; CHECK-NEXT: .cfi_def_cfa w29, 16
68+
; CHECK-NEXT: .cfi_offset w30, -8
69+
; CHECK-NEXT: .cfi_offset w29, -16
70+
; CHECK-NEXT: .cfi_offset w27, -24
71+
; CHECK-NEXT: .cfi_offset w28, -32
72+
; CHECK-NEXT: .cfi_offset b8, -40
73+
; CHECK-NEXT: .cfi_offset b9, -48
74+
; CHECK-NEXT: .cfi_offset b10, -56
75+
; CHECK-NEXT: .cfi_offset b11, -64
76+
; CHECK-NEXT: .cfi_offset b12, -72
77+
; CHECK-NEXT: .cfi_offset b13, -80
78+
; CHECK-NEXT: .cfi_offset b14, -88
79+
; CHECK-NEXT: .cfi_offset b15, -96
80+
; CHECK-NEXT: .cfi_remember_state
81+
; CHECK-NEXT: sub x9, sp, #160
82+
; CHECK-NEXT: and sp, x9, #0xffffffffffffff00
83+
; CHECK-NEXT: Lloh0:
84+
; CHECK-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE
85+
; CHECK-NEXT: Lloh1:
86+
; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard@GOTPAGEOFF]
87+
; CHECK-NEXT: Lloh2:
88+
; CHECK-NEXT: ldr x8, [x8]
89+
; CHECK-NEXT: str x8, [sp, #152]
90+
; CHECK-NEXT: mov z0.b, #0 ; =0x0
91+
; CHECK-NEXT: stp q0, q0, [sp, #32]
92+
; CHECK-NEXT: stp q0, q0, [sp]
93+
; CHECK-NEXT: mov x8, sp
94+
; CHECK-NEXT: ; InlineAsm Start
95+
; CHECK-NEXT: ptrue p0.s
96+
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
97+
; CHECK-EMPTY:
98+
; CHECK-NEXT: ; InlineAsm End
99+
; CHECK-NEXT: ldr x8, [sp, #152]
100+
; CHECK-NEXT: Lloh3:
101+
; CHECK-NEXT: adrp x9, ___stack_chk_guard@GOTPAGE
102+
; CHECK-NEXT: Lloh4:
103+
; CHECK-NEXT: ldr x9, [x9, ___stack_chk_guard@GOTPAGEOFF]
104+
; CHECK-NEXT: Lloh5:
105+
; CHECK-NEXT: ldr x9, [x9]
106+
; CHECK-NEXT: cmp x9, x8
107+
; CHECK-NEXT: b.ne LBB1_2
108+
; CHECK-NEXT: ; %bb.1: ; %entry
109+
; CHECK-NEXT: sub sp, x29, #80
110+
; CHECK-NEXT: .cfi_def_cfa wsp, 96
111+
; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
112+
; CHECK-NEXT: ldp x28, x27, [sp, #64] ; 16-byte Folded Reload
113+
; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
114+
; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
115+
; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
116+
; CHECK-NEXT: ldp d15, d14, [sp], #96 ; 16-byte Folded Reload
117+
; CHECK-NEXT: .cfi_def_cfa_offset 0
118+
; CHECK-NEXT: .cfi_restore w30
119+
; CHECK-NEXT: .cfi_restore w29
120+
; CHECK-NEXT: .cfi_restore w27
121+
; CHECK-NEXT: .cfi_restore w28
122+
; CHECK-NEXT: .cfi_restore b8
123+
; CHECK-NEXT: .cfi_restore b9
124+
; CHECK-NEXT: .cfi_restore b10
125+
; CHECK-NEXT: .cfi_restore b11
126+
; CHECK-NEXT: .cfi_restore b12
127+
; CHECK-NEXT: .cfi_restore b13
128+
; CHECK-NEXT: .cfi_restore b14
129+
; CHECK-NEXT: .cfi_restore b15
130+
; CHECK-NEXT: ret
131+
; CHECK-NEXT: LBB1_2: ; %entry
132+
; CHECK-NEXT: .cfi_restore_state
133+
; CHECK-NEXT: smstop sm
134+
; CHECK-NEXT: bl ___stack_chk_fail
135+
; CHECK-NEXT: smstart sm
136+
; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh3, Lloh4, Lloh5
137+
; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh0, Lloh1, Lloh2
138+
entry:
139+
%uu = alloca [16 x float], align 256
140+
call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %uu) #5
141+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 256 dereferenceable(64) %uu, i8 0, i64 64, i1 false)
142+
call void asm sideeffect "ptrue p0.s\0Ast1w { z0.s }, p0, [$0]\0A", "r"(ptr nonnull %uu) #5
143+
call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %uu) #5
144+
ret void
145+
}
146+
147+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
148+
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
149+
150+
; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: write)
151+
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
152+
153+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
154+
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
155+
156+
attributes #0 = { mustprogress norecurse nounwind ssp uwtable(sync) "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" }
157+
attributes #1 = { mustprogress norecurse nounwind ssp uwtable(sync) "aarch64_pstate_sm_enabled" "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" }
158+
attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
159+
attributes #3 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) }
160+
attributes #4 = { "aarch64_pstate_sm_enabled" "no-builtin-calloc" "no-builtin-stpcpy" }
161+
attributes #5 = { nounwind }

0 commit comments

Comments
 (0)