Skip to content

Commit ca7dc89

Browse files
[AArch64][SME] Remove unused ZA lazy-save (#81648)
This patch removes the TPIDR2 lazy-save object and buffer if no lazy save is required. --------- Co-authored-by: Samuel Tebbs <[email protected]>
1 parent 7ea203b commit ca7dc89

10 files changed

+323
-174
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 110 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2492,6 +2492,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
24922492
switch ((AArch64ISD::NodeType)Opcode) {
24932493
case AArch64ISD::FIRST_NUMBER:
24942494
break;
2495+
MAKE_CASE(AArch64ISD::ALLOCATE_ZA_BUFFER)
2496+
MAKE_CASE(AArch64ISD::INIT_TPIDR2OBJ)
24952497
MAKE_CASE(AArch64ISD::COALESCER_BARRIER)
24962498
MAKE_CASE(AArch64ISD::VG_SAVE)
24972499
MAKE_CASE(AArch64ISD::VG_RESTORE)
@@ -2991,6 +2993,80 @@ AArch64TargetLowering::EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const {
29912993
return BB;
29922994
}
29932995

2996+
MachineBasicBlock *
2997+
AArch64TargetLowering::EmitInitTPIDR2Object(MachineInstr &MI,
2998+
MachineBasicBlock *BB) const {
2999+
MachineFunction *MF = BB->getParent();
3000+
MachineFrameInfo &MFI = MF->getFrameInfo();
3001+
AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3002+
TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
3003+
if (TPIDR2.Uses > 0) {
3004+
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3005+
// Store the buffer pointer to the TPIDR2 stack object.
3006+
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRXui))
3007+
.addReg(MI.getOperand(0).getReg())
3008+
.addFrameIndex(TPIDR2.FrameIndex)
3009+
.addImm(0);
3010+
// Set the reserved bytes (10-15) to zero
3011+
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRHHui))
3012+
.addReg(AArch64::WZR)
3013+
.addFrameIndex(TPIDR2.FrameIndex)
3014+
.addImm(5);
3015+
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRWui))
3016+
.addReg(AArch64::WZR)
3017+
.addFrameIndex(TPIDR2.FrameIndex)
3018+
.addImm(3);
3019+
} else
3020+
MFI.RemoveStackObject(TPIDR2.FrameIndex);
3021+
3022+
BB->remove_instr(&MI);
3023+
return BB;
3024+
}
3025+
3026+
MachineBasicBlock *
3027+
AArch64TargetLowering::EmitAllocateZABuffer(MachineInstr &MI,
3028+
MachineBasicBlock *BB) const {
3029+
MachineFunction *MF = BB->getParent();
3030+
MachineFrameInfo &MFI = MF->getFrameInfo();
3031+
AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3032+
// TODO This function grows the stack with a subtraction, which doesn't work
3033+
// on Windows. Some refactoring to share the functionality in
3034+
// LowerWindowsDYNAMIC_STACKALLOC will be required once the Windows ABI
3035+
// supports SME
3036+
assert(!MF->getSubtarget<AArch64Subtarget>().isTargetWindows() &&
3037+
"Lazy ZA save is not yet supported on Windows");
3038+
3039+
TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
3040+
3041+
if (TPIDR2.Uses > 0) {
3042+
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3043+
MachineRegisterInfo &MRI = MF->getRegInfo();
3044+
3045+
// The SUBXrs below won't always be emitted in a form that accepts SP
3046+
// directly
3047+
Register SP = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3048+
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), SP)
3049+
.addReg(AArch64::SP);
3050+
3051+
// Allocate a lazy-save buffer object of the size given, normally SVL * SVL
3052+
auto Size = MI.getOperand(1).getReg();
3053+
auto Dest = MI.getOperand(0).getReg();
3054+
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::MSUBXrrr), Dest)
3055+
.addReg(Size)
3056+
.addReg(Size)
3057+
.addReg(SP);
3058+
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
3059+
AArch64::SP)
3060+
.addReg(Dest);
3061+
3062+
// We have just allocated a variable sized object, tell this to PEI.
3063+
MFI.CreateVariableSizedObject(Align(16), nullptr);
3064+
}
3065+
3066+
BB->remove_instr(&MI);
3067+
return BB;
3068+
}
3069+
29943070
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
29953071
MachineInstr &MI, MachineBasicBlock *BB) const {
29963072

@@ -3021,7 +3097,10 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
30213097
MI.dump();
30223098
#endif
30233099
llvm_unreachable("Unexpected instruction for custom inserter!");
3024-
3100+
case AArch64::InitTPIDR2Obj:
3101+
return EmitInitTPIDR2Object(MI, BB);
3102+
case AArch64::AllocateZABuffer:
3103+
return EmitAllocateZABuffer(MI, BB);
30253104
case AArch64::F128CSEL:
30263105
return EmitF128CSEL(MI, BB);
30273106
case TargetOpcode::STATEPOINT:
@@ -7029,47 +7108,6 @@ AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
70297108
}
70307109
}
70317110

7032-
7033-
unsigned
7034-
AArch64TargetLowering::allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
7035-
SelectionDAG &DAG) const {
7036-
MachineFunction &MF = DAG.getMachineFunction();
7037-
MachineFrameInfo &MFI = MF.getFrameInfo();
7038-
7039-
// Allocate a lazy-save buffer object of size SVL.B * SVL.B (worst-case)
7040-
SDValue N = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
7041-
DAG.getConstant(1, DL, MVT::i32));
7042-
SDValue NN = DAG.getNode(ISD::MUL, DL, MVT::i64, N, N);
7043-
SDValue Ops[] = {Chain, NN, DAG.getConstant(1, DL, MVT::i64)};
7044-
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
7045-
SDValue Buffer = DAG.getNode(ISD::DYNAMIC_STACKALLOC, DL, VTs, Ops);
7046-
Chain = Buffer.getValue(1);
7047-
MFI.CreateVariableSizedObject(Align(1), nullptr);
7048-
7049-
// Allocate an additional TPIDR2 object on the stack (16 bytes)
7050-
unsigned TPIDR2Obj = MFI.CreateStackObject(16, Align(16), false);
7051-
7052-
// Store the buffer pointer to the TPIDR2 stack object.
7053-
MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, TPIDR2Obj);
7054-
SDValue Ptr = DAG.getFrameIndex(
7055-
TPIDR2Obj,
7056-
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
7057-
Chain = DAG.getStore(Chain, DL, Buffer, Ptr, MPI);
7058-
7059-
// Set the reserved bytes (10-15) to zero
7060-
EVT PtrTy = Ptr.getValueType();
7061-
SDValue ReservedPtr =
7062-
DAG.getNode(ISD::ADD, DL, PtrTy, Ptr, DAG.getConstant(10, DL, PtrTy));
7063-
Chain = DAG.getStore(Chain, DL, DAG.getConstant(0, DL, MVT::i16), ReservedPtr,
7064-
MPI);
7065-
ReservedPtr =
7066-
DAG.getNode(ISD::ADD, DL, PtrTy, Ptr, DAG.getConstant(12, DL, PtrTy));
7067-
Chain = DAG.getStore(Chain, DL, DAG.getConstant(0, DL, MVT::i32), ReservedPtr,
7068-
MPI);
7069-
7070-
return TPIDR2Obj;
7071-
}
7072-
70737111
static bool isPassedInFPR(EVT VT) {
70747112
return VT.isFixedLengthVector() ||
70757113
(VT.isFloatingPoint() && !VT.isScalableVector());
@@ -7485,10 +7523,28 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
74857523
if (Subtarget->hasCustomCallingConv())
74867524
Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
74877525

7488-
// Conservatively assume the function requires the lazy-save mechanism.
7526+
// Create a 16 Byte TPIDR2 object. The dynamic buffer
7527+
// will be expanded and stored in the static object later using a pseudonode.
74897528
if (SMEAttrs(MF.getFunction()).hasZAState()) {
7490-
unsigned TPIDR2Obj = allocateLazySaveBuffer(Chain, DL, DAG);
7491-
FuncInfo->setLazySaveTPIDR2Obj(TPIDR2Obj);
7529+
TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
7530+
TPIDR2.FrameIndex = MFI.CreateStackObject(16, Align(16), false);
7531+
SDValue SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
7532+
DAG.getConstant(1, DL, MVT::i32));
7533+
7534+
SDValue Buffer;
7535+
if (!Subtarget->isTargetWindows() && !hasInlineStackProbe(MF)) {
7536+
Buffer = DAG.getNode(AArch64ISD::ALLOCATE_ZA_BUFFER, DL,
7537+
DAG.getVTList(MVT::i64, MVT::Other), {Chain, SVL});
7538+
} else {
7539+
SDValue Size = DAG.getNode(ISD::MUL, DL, MVT::i64, SVL, SVL);
7540+
Buffer = DAG.getNode(ISD::DYNAMIC_STACKALLOC, DL,
7541+
DAG.getVTList(MVT::i64, MVT::Other),
7542+
{Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
7543+
MFI.CreateVariableSizedObject(Align(16), nullptr);
7544+
}
7545+
Chain = DAG.getNode(
7546+
AArch64ISD::INIT_TPIDR2OBJ, DL, DAG.getVTList(MVT::Other),
7547+
{/*Chain*/ Buffer.getValue(1), /*Buffer ptr*/ Buffer.getValue(0)});
74927548
}
74937549

74947550
if (CallConv == CallingConv::PreserveNone) {
@@ -8174,9 +8230,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
81748230

81758231
bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
81768232
if (RequiresLazySave) {
8177-
unsigned TPIDR2Obj = FuncInfo->getLazySaveTPIDR2Obj();
8178-
MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, TPIDR2Obj);
8179-
SDValue TPIDR2ObjAddr = DAG.getFrameIndex(TPIDR2Obj,
8233+
const TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
8234+
MachinePointerInfo MPI =
8235+
MachinePointerInfo::getStack(MF, TPIDR2.FrameIndex);
8236+
SDValue TPIDR2ObjAddr = DAG.getFrameIndex(
8237+
TPIDR2.FrameIndex,
81808238
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
81818239
SDValue NumZaSaveSlicesAddr =
81828240
DAG.getNode(ISD::ADD, DL, TPIDR2ObjAddr.getValueType(), TPIDR2ObjAddr,
@@ -8719,7 +8777,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
87198777

87208778
if (RequiresLazySave) {
87218779
// Conditionally restore the lazy save using a pseudo node.
8722-
unsigned FI = FuncInfo->getLazySaveTPIDR2Obj();
8780+
TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
87238781
SDValue RegMask = DAG.getRegisterMask(
87248782
TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
87258783
SDValue RestoreRoutine = DAG.getTargetExternalSymbol(
@@ -8732,7 +8790,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
87328790
// RESTORE_ZA pseudo.
87338791
SDValue Glue;
87348792
SDValue TPIDR2Block = DAG.getFrameIndex(
8735-
FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
8793+
TPIDR2.FrameIndex,
8794+
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
87368795
Result = DAG.getCopyToReg(Result, DL, AArch64::X0, TPIDR2Block, Glue);
87378796
Result =
87388797
DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other,
@@ -8744,6 +8803,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
87448803
ISD::INTRINSIC_VOID, DL, MVT::Other, Result,
87458804
DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32),
87468805
DAG.getConstant(0, DL, MVT::i64));
8806+
TPIDR2.Uses++;
87478807
}
87488808

87498809
if (RequiresSMChange || RequiresLazySave || ShouldPreserveZT0) {

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,8 @@ enum NodeType : unsigned {
457457
// SME
458458
RDSVL,
459459
REVD_MERGE_PASSTHRU,
460+
ALLOCATE_ZA_BUFFER,
461+
INIT_TPIDR2OBJ,
460462

461463
// Asserts that a function argument (i32) is zero-extended to i8 by
462464
// the caller
@@ -658,6 +660,10 @@ class AArch64TargetLowering : public TargetLowering {
658660
MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
659661
unsigned Opcode, bool Op0IsDef) const;
660662
MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
663+
MachineBasicBlock *EmitInitTPIDR2Object(MachineInstr &MI,
664+
MachineBasicBlock *BB) const;
665+
MachineBasicBlock *EmitAllocateZABuffer(MachineInstr &MI,
666+
MachineBasicBlock *BB) const;
661667

662668
MachineBasicBlock *
663669
EmitInstrWithCustomInserter(MachineInstr &MI,
@@ -1037,9 +1043,6 @@ class AArch64TargetLowering : public TargetLowering {
10371043

10381044
bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;
10391045

1040-
unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
1041-
SelectionDAG &DAG) const;
1042-
10431046
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
10441047
bool isVarArg,
10451048
const SmallVectorImpl<ISD::InputArg> &Ins,

llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ struct AArch64FunctionInfo;
3636
class AArch64Subtarget;
3737
class MachineInstr;
3838

39+
struct TPIDR2Object {
40+
int FrameIndex = std::numeric_limits<int>::max();
41+
unsigned Uses = 0;
42+
};
43+
3944
/// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and
4045
/// contains private AArch64-specific information for each MachineFunction.
4146
class AArch64FunctionInfo final : public MachineFunctionInfo {
@@ -196,7 +201,7 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
196201
bool IsSVECC = false;
197202

198203
/// The frame-index for the TPIDR2 object used for lazy saves.
199-
Register LazySaveTPIDR2Obj = 0;
204+
TPIDR2Object TPIDR2;
200205

201206
/// Whether this function changes streaming mode within the function.
202207
bool HasStreamingModeChanges = false;
@@ -248,8 +253,7 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
248253
bool isSVECC() const { return IsSVECC; };
249254
void setIsSVECC(bool s) { IsSVECC = s; };
250255

251-
unsigned getLazySaveTPIDR2Obj() const { return LazySaveTPIDR2Obj; }
252-
void setLazySaveTPIDR2Obj(unsigned Reg) { LazySaveTPIDR2Obj = Reg; }
256+
TPIDR2Object &getTPIDR2Obj() { return TPIDR2; }
253257

254258
void initializeBaseYamlFields(const yaml::AArch64FunctionInfo &YamlMFI);
255259

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,21 @@ def AArch64VGSave : SDNode<"AArch64ISD::VG_SAVE", SDTypeProfile<0, 0, []>,
3737
def AArch64VGRestore : SDNode<"AArch64ISD::VG_RESTORE", SDTypeProfile<0, 0, []>,
3838
[SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>;
3939

40+
def AArch64AllocateZABuffer : SDNode<"AArch64ISD::ALLOCATE_ZA_BUFFER", SDTypeProfile<1, 1,
41+
[SDTCisInt<0>, SDTCisInt<1>]>,
42+
[SDNPHasChain, SDNPSideEffect]>;
43+
let usesCustomInserter = 1, Defs = [SP], Uses = [SP] in {
44+
def AllocateZABuffer : Pseudo<(outs GPR64sp:$dst), (ins GPR64:$size), []>, Sched<[WriteI]> {}
45+
}
46+
def : Pat<(i64 (AArch64AllocateZABuffer GPR64:$size)),
47+
(AllocateZABuffer $size)>;
48+
49+
def AArch64InitTPIDR2Obj : SDNode<"AArch64ISD::INIT_TPIDR2OBJ", SDTypeProfile<0, 1,
50+
[SDTCisInt<0>]>, [SDNPHasChain, SDNPMayStore]>;
51+
let usesCustomInserter = 1 in {
52+
def InitTPIDR2Obj : Pseudo<(outs), (ins GPR64:$buffer), [(AArch64InitTPIDR2Obj GPR64:$buffer)]>, Sched<[WriteI]> {}
53+
}
54+
4055
//===----------------------------------------------------------------------===//
4156
// Instruction naming conventions.
4257
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -232,17 +232,8 @@ declare double @za_shared_callee(double) "aarch64_inout_za"
232232
define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline optnone "aarch64_new_za"{
233233
; CHECK-COMMON-LABEL: za_new_caller_to_za_shared_callee:
234234
; CHECK-COMMON: // %bb.0: // %prelude
235-
; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
236-
; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
237-
; CHECK-COMMON-NEXT: mov x29, sp
238-
; CHECK-COMMON-NEXT: sub sp, sp, #16
235+
; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
239236
; CHECK-COMMON-NEXT: rdsvl x8, #1
240-
; CHECK-COMMON-NEXT: mov x9, sp
241-
; CHECK-COMMON-NEXT: msub x8, x8, x8, x9
242-
; CHECK-COMMON-NEXT: mov sp, x8
243-
; CHECK-COMMON-NEXT: stur x8, [x29, #-16]
244-
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
245-
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
246237
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
247238
; CHECK-COMMON-NEXT: cbz x8, .LBB6_2
248239
; CHECK-COMMON-NEXT: b .LBB6_1
@@ -258,9 +249,7 @@ define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline o
258249
; CHECK-COMMON-NEXT: fmov d1, x8
259250
; CHECK-COMMON-NEXT: fadd d0, d0, d1
260251
; CHECK-COMMON-NEXT: smstop za
261-
; CHECK-COMMON-NEXT: mov sp, x29
262-
; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
263-
; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
252+
; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
264253
; CHECK-COMMON-NEXT: ret
265254
entry:
266255
%call = call double @za_shared_callee(double %x)
@@ -321,12 +310,12 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind {
321310
; CHECK-COMMON-NEXT: mov x9, sp
322311
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
323312
; CHECK-COMMON-NEXT: mov sp, x9
324-
; CHECK-COMMON-NEXT: sub x10, x29, #16
325-
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
326-
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
327313
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
314+
; CHECK-COMMON-NEXT: sub x9, x29, #16
315+
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
316+
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
328317
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
329-
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
318+
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
330319
; CHECK-COMMON-NEXT: bl __addtf3
331320
; CHECK-COMMON-NEXT: smstart za
332321
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
@@ -386,12 +375,12 @@ define double @frem_call_za(double %a, double %b) "aarch64_inout_za" nounwind {
386375
; CHECK-COMMON-NEXT: mov x9, sp
387376
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
388377
; CHECK-COMMON-NEXT: mov sp, x9
389-
; CHECK-COMMON-NEXT: sub x10, x29, #16
390-
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
391-
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
392378
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
379+
; CHECK-COMMON-NEXT: sub x9, x29, #16
380+
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
381+
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
393382
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
394-
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
383+
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
395384
; CHECK-COMMON-NEXT: bl fmod
396385
; CHECK-COMMON-NEXT: smstart za
397386
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0

llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ define void @quux() #1 {
1818
; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
1919
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
2020
; CHECK-NEXT: mov x29, sp
21-
; CHECK-NEXT: sub sp, sp, #400
21+
; CHECK-NEXT: sub sp, sp, #384
2222
; CHECK-NEXT: addvl sp, sp, #-1
2323
; CHECK-NEXT: mov x19, sp
2424
; CHECK-NEXT: .cfi_def_cfa w29, 96
@@ -35,14 +35,6 @@ define void @quux() #1 {
3535
; CHECK-NEXT: .cfi_offset w30, -88
3636
; CHECK-NEXT: .cfi_offset w29, -96
3737
; CHECK-NEXT: rdsvl x8, #1
38-
; CHECK-NEXT: mul x9, x8, x8
39-
; CHECK-NEXT: mov x8, sp
40-
; CHECK-NEXT: subs x8, x8, x9
41-
; CHECK-NEXT: mov sp, x8
42-
; CHECK-NEXT: str x8, [x19, #384]
43-
; CHECK-NEXT: mov w8, wzr
44-
; CHECK-NEXT: strh w8, [x19, #394]
45-
; CHECK-NEXT: str w8, [x19, #396]
4638
; CHECK-NEXT: mrs x8, TPIDR2_EL0
4739
; CHECK-NEXT: cbz x8, .LBB0_2
4840
; CHECK-NEXT: b .LBB0_1

0 commit comments

Comments
 (0)