Skip to content

[AArch64][SME] Create new pass to remove COALESCER_BARRIER early. #85386

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ FunctionPass *createFalkorMarkStridedAccessesPass();
FunctionPass *createAArch64PointerAuthPass();
FunctionPass *createAArch64BranchTargetsPass();
FunctionPass *createAArch64MIPeepholeOptPass();
FunctionPass *createAArch64PostCoalescerPass();

FunctionPass *createAArch64CleanupLocalDynamicTLSPass();

Expand Down Expand Up @@ -93,6 +94,7 @@ void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
void initializeAArch64MIPeepholeOptPass(PassRegistry &);
void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &);
void initializeAArch64PostCoalescerPass(PassRegistry &);
void initializeAArch64PostLegalizerCombinerPass(PassRegistry &);
void initializeAArch64PostLegalizerLoweringPass(PassRegistry &);
void initializeAArch64PostSelectOptimizePass(PassRegistry &);
Expand Down
101 changes: 101 additions & 0 deletions llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
//===- AArch64PostCoalescerPass.cpp - AArch64 Post Coalescer pass ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//

#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/InitializePasses.h"

using namespace llvm;

#define DEBUG_TYPE "aarch64-post-coalescer-pass"

namespace {

struct AArch64PostCoalescer : public MachineFunctionPass {
static char ID;

AArch64PostCoalescer() : MachineFunctionPass(ID) {
initializeAArch64PostCoalescerPass(*PassRegistry::getPassRegistry());
}

LiveIntervals *LIS;
MachineRegisterInfo *MRI;

bool runOnMachineFunction(MachineFunction &MF) override;

StringRef getPassName() const override {
return "AArch64 Post Coalescer pass";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
AU.addRequired<LiveIntervals>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};

char AArch64PostCoalescer::ID = 0;

} // end anonymous namespace

INITIALIZE_PASS_BEGIN(AArch64PostCoalescer, "aarch64-post-coalescer-pass",
"AArch64 Post Coalescer Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_END(AArch64PostCoalescer, "aarch64-post-coalescer-pass",
"AArch64 Post Coalescer Pass", false, false)

bool AArch64PostCoalescer::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;

AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
if (!FuncInfo->hasStreamingModeChanges())
return false;

MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
bool Changed = false;

for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : make_early_inc_range(MBB)) {
switch (MI.getOpcode()) {
default:
break;
case AArch64::COALESCER_BARRIER_FPR16:
case AArch64::COALESCER_BARRIER_FPR32:
case AArch64::COALESCER_BARRIER_FPR64:
case AArch64::COALESCER_BARRIER_FPR128: {
Register Src = MI.getOperand(1).getReg();
Register Dst = MI.getOperand(0).getReg();
if (Src != Dst)
MRI->replaceRegWith(Dst, Src);

// MI must be erased from the basic block before recalculating the live
// interval.
LIS->RemoveMachineInstrFromMaps(MI);
MI.eraseFromParent();

LIS->removeInterval(Src);
LIS->createAndComputeVirtRegInterval(Src);

Changed = true;
break;
}
}
}
}

return Changed;
}

FunctionPass *llvm::createAArch64PostCoalescerPass() {
return new AArch64PostCoalescer();
}
7 changes: 7 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64O0PreLegalizerCombinerPass(*PR);
initializeAArch64PreLegalizerCombinerPass(*PR);
initializeAArch64PointerAuthPass(*PR);
initializeAArch64PostCoalescerPass(*PR);
initializeAArch64PostLegalizerCombinerPass(*PR);
initializeAArch64PostLegalizerLoweringPass(*PR);
initializeAArch64PostSelectOptimizePass(*PR);
Expand Down Expand Up @@ -539,6 +540,7 @@ class AArch64PassConfig : public TargetPassConfig {
void addPreEmitPass() override;
void addPostBBSections() override;
void addPreEmitPass2() override;
bool addRegAssignAndRewriteOptimized() override;

std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
};
Expand Down Expand Up @@ -876,6 +878,11 @@ void AArch64PassConfig::addPreEmitPass2() {
addPass(createUnpackMachineBundles(nullptr));
}

bool AArch64PassConfig::addRegAssignAndRewriteOptimized() {
addPass(createAArch64PostCoalescerPass());
return TargetPassConfig::addRegAssignAndRewriteOptimized();
}

MachineFunctionInfo *AArch64TargetMachine::createMachineFunctionInfo(
BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ add_llvm_target(AArch64CodeGen
AArch64MIPeepholeOpt.cpp
AArch64MCInstLower.cpp
AArch64PointerAuth.cpp
AArch64PostCoalescerPass.cpp
AArch64PromoteConstant.cpp
AArch64PBQPRegAlloc.cpp
AArch64RegisterInfo.cpp
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@
; CHECK-NEXT: Register Coalescer
; CHECK-NEXT: Rename Disconnected Subregister Components
; CHECK-NEXT: Machine Instruction Scheduler
; CHECK-NEXT: AArch64 Post Coalescer pass
; CHECK-NEXT: Machine Block Frequency Analysis
; CHECK-NEXT: Debug Variable Analysis
; CHECK-NEXT: Live Stack Slot Analysis
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ define void @dont_coalesce_args(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind
; CHECK-REGALLOC: bb.0 (%ir-block.0):
; CHECK-REGALLOC-NEXT: liveins: $q0
; CHECK-REGALLOC-NEXT: {{ $}}
; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
; CHECK-REGALLOC-NEXT: renamable $q0 = KILL killed renamable $q0, implicit-def $z0
Expand Down Expand Up @@ -61,7 +60,6 @@ define <2 x i64> @dont_coalesce_res() "aarch64_pstate_sm_body" nounwind {
; CHECK-REGALLOC-NEXT: BL @scalable_res, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $z0
; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-REGALLOC-NEXT: renamable $q0 = KILL renamable $q0, implicit killed $z0
; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg
; CHECK-REGALLOC-NEXT: $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
Expand Down Expand Up @@ -94,17 +92,13 @@ define <2 x i64> @dont_coalesce_arg_that_is_also_res(<2 x i64> %a) "aarch64_psta
; CHECK-REGALLOC: bb.0 (%ir-block.0):
; CHECK-REGALLOC-NEXT: liveins: $q0
; CHECK-REGALLOC-NEXT: {{ $}}
; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
; CHECK-REGALLOC-NEXT: renamable $q0 = KILL killed renamable $q0, implicit-def $z0
; CHECK-REGALLOC-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-REGALLOC-NEXT: BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp
; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg
; CHECK-REGALLOC-NEXT: $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
; CHECK-REGALLOC-NEXT: RET_ReallyLR implicit $q0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,7 @@ define void @streaming_compatible_arg(float %f) #0 {
; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT: tbz w19, #0, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
Expand Down
10 changes: 4 additions & 6 deletions llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -331,9 +331,9 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
; CHECK-COMMON-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
; CHECK-COMMON-NEXT: smstop sm
; CHECK-COMMON-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-COMMON-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-COMMON-NEXT: bl __addtf3
; CHECK-COMMON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
Expand Down Expand Up @@ -392,9 +392,9 @@ define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounw
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: stp s1, s0, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstop sm
; CHECK-COMMON-NEXT: ldp s1, s0, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: bl fmodf
; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
Expand Down Expand Up @@ -422,9 +422,7 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: bl __arm_sme_state
; CHECK-COMMON-NEXT: ldp s2, s0, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: and x19, x0, #0x1
; CHECK-COMMON-NEXT: stp s2, s0, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: smstop sm
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,6 @@ define void @dont_coalesce_res_f32(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1116,7 +1115,6 @@ define void @dont_coalesce_res_f64(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1151,7 +1149,6 @@ define void @dont_coalesce_res_v1i8(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1183,7 +1180,6 @@ define void @dont_coalesce_res_v1i16(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1215,7 +1211,6 @@ define void @dont_coalesce_res_v1i32(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1247,7 +1242,6 @@ define void @dont_coalesce_res_v1i64(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1311,7 +1305,6 @@ define void @dont_coalesce_res_v1f32(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1343,7 +1336,6 @@ define void @dont_coalesce_res_v1f64(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,31 +8,27 @@ declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible";
define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind {
; CHECK-LABEL: sm_body_sm_compatible_simple:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #96
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x8, x0, #0x1
; CHECK-NEXT: tbnz w8, #0, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: fmov s0, wzr
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT: tbnz w8, #0, .LBB0_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: fmov s0, wzr
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
ret float zeroinitializer
}
Expand Down
4 changes: 0 additions & 4 deletions llvm/test/CodeGen/AArch64/sme-streaming-body.ll
Original file line number Diff line number Diff line change
Expand Up @@ -247,15 +247,11 @@ define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_psta
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstop sm
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: bl cos
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstop sm
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -469,11 +469,7 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr
; CHECK-NEXT: mov x9, x0
; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: ldp s4, s0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: stp s4, s0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: ldp d4, d0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: stp d4, d0, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: tbz w19, #0, .LBB10_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: smstop sm
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
Original file line number Diff line number Diff line change
Expand Up @@ -405,11 +405,11 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr
; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: stp s1, s0, [sp, #24] // 8-byte Folded Spill
; CHECK-NEXT: stp d3, d2, [sp, #8] // 16-byte Folded Spill
; CHECK-NEXT: stp d2, d3, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstop sm
; CHECK-NEXT: ldp s1, s0, [sp, #24] // 8-byte Folded Reload
; CHECK-NEXT: ldp d3, d2, [sp, #8] // 16-byte Folded Reload
; CHECK-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: ldp d2, d3, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: bl bar
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
Expand Down
Loading