Skip to content

Commit c3d5886

Browse files
[AArch64][SME] Create new pass to remove COALESCER_BARRIER early. (#85386)
The purpose of the COALESCER_BARRIER pseudo node is to prevent the register coalescer from coalescing certain COPY instructions around smstart/smstop instructions, so that we spill only the (required) FPR register rather than the encompassing ZPR register. The pseudos are removed in the AArch64ExpandPseudo pass. However, because the node itself is a _use_ of a register, this occassionally leads to redundant spills/fills, because the register allocator thinks the virtual register is actually used before an smstart/smstop instruction, causing it to be filled, at which points it requires immediate spilling again to ensure it stays live over the smstart/smstop instruction. We can avoid that by removing the pseudo nodes right after coalescing, but before register allocation.
1 parent 32cb3c5 commit c3d5886

14 files changed

+134
-52
lines changed

llvm/lib/Target/AArch64/AArch64.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ FunctionPass *createFalkorMarkStridedAccessesPass();
5454
FunctionPass *createAArch64PointerAuthPass();
5555
FunctionPass *createAArch64BranchTargetsPass();
5656
FunctionPass *createAArch64MIPeepholeOptPass();
57+
FunctionPass *createAArch64PostCoalescerPass();
5758

5859
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
5960

@@ -93,6 +94,7 @@ void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
9394
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
9495
void initializeAArch64MIPeepholeOptPass(PassRegistry &);
9596
void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &);
97+
void initializeAArch64PostCoalescerPass(PassRegistry &);
9698
void initializeAArch64PostLegalizerCombinerPass(PassRegistry &);
9799
void initializeAArch64PostLegalizerLoweringPass(PassRegistry &);
98100
void initializeAArch64PostSelectOptimizePass(PassRegistry &);
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
//===- AArch64PostCoalescerPass.cpp - AArch64 Post Coalescer pass ---------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//===----------------------------------------------------------------------===//
9+
10+
#include "AArch64InstrInfo.h"
11+
#include "AArch64MachineFunctionInfo.h"
12+
#include "llvm/CodeGen/LiveIntervals.h"
13+
#include "llvm/CodeGen/MachineRegisterInfo.h"
14+
#include "llvm/InitializePasses.h"
15+
16+
using namespace llvm;
17+
18+
#define DEBUG_TYPE "aarch64-post-coalescer-pass"
19+
20+
namespace {
21+
22+
struct AArch64PostCoalescer : public MachineFunctionPass {
23+
static char ID;
24+
25+
AArch64PostCoalescer() : MachineFunctionPass(ID) {
26+
initializeAArch64PostCoalescerPass(*PassRegistry::getPassRegistry());
27+
}
28+
29+
LiveIntervals *LIS;
30+
MachineRegisterInfo *MRI;
31+
32+
bool runOnMachineFunction(MachineFunction &MF) override;
33+
34+
StringRef getPassName() const override {
35+
return "AArch64 Post Coalescer pass";
36+
}
37+
38+
void getAnalysisUsage(AnalysisUsage &AU) const override {
39+
AU.setPreservesAll();
40+
AU.addRequired<LiveIntervals>();
41+
MachineFunctionPass::getAnalysisUsage(AU);
42+
}
43+
};
44+
45+
char AArch64PostCoalescer::ID = 0;
46+
47+
} // end anonymous namespace
48+
49+
INITIALIZE_PASS_BEGIN(AArch64PostCoalescer, "aarch64-post-coalescer-pass",
50+
"AArch64 Post Coalescer Pass", false, false)
51+
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
52+
INITIALIZE_PASS_END(AArch64PostCoalescer, "aarch64-post-coalescer-pass",
53+
"AArch64 Post Coalescer Pass", false, false)
54+
55+
bool AArch64PostCoalescer::runOnMachineFunction(MachineFunction &MF) {
56+
if (skipFunction(MF.getFunction()))
57+
return false;
58+
59+
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
60+
if (!FuncInfo->hasStreamingModeChanges())
61+
return false;
62+
63+
MRI = &MF.getRegInfo();
64+
LIS = &getAnalysis<LiveIntervals>();
65+
bool Changed = false;
66+
67+
for (MachineBasicBlock &MBB : MF) {
68+
for (MachineInstr &MI : make_early_inc_range(MBB)) {
69+
switch (MI.getOpcode()) {
70+
default:
71+
break;
72+
case AArch64::COALESCER_BARRIER_FPR16:
73+
case AArch64::COALESCER_BARRIER_FPR32:
74+
case AArch64::COALESCER_BARRIER_FPR64:
75+
case AArch64::COALESCER_BARRIER_FPR128: {
76+
Register Src = MI.getOperand(1).getReg();
77+
Register Dst = MI.getOperand(0).getReg();
78+
if (Src != Dst)
79+
MRI->replaceRegWith(Dst, Src);
80+
81+
// MI must be erased from the basic block before recalculating the live
82+
// interval.
83+
LIS->RemoveMachineInstrFromMaps(MI);
84+
MI.eraseFromParent();
85+
86+
LIS->removeInterval(Src);
87+
LIS->createAndComputeVirtRegInterval(Src);
88+
89+
Changed = true;
90+
break;
91+
}
92+
}
93+
}
94+
}
95+
96+
return Changed;
97+
}
98+
99+
FunctionPass *llvm::createAArch64PostCoalescerPass() {
100+
return new AArch64PostCoalescer();
101+
}

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
235235
initializeAArch64O0PreLegalizerCombinerPass(*PR);
236236
initializeAArch64PreLegalizerCombinerPass(*PR);
237237
initializeAArch64PointerAuthPass(*PR);
238+
initializeAArch64PostCoalescerPass(*PR);
238239
initializeAArch64PostLegalizerCombinerPass(*PR);
239240
initializeAArch64PostLegalizerLoweringPass(*PR);
240241
initializeAArch64PostSelectOptimizePass(*PR);
@@ -539,6 +540,7 @@ class AArch64PassConfig : public TargetPassConfig {
539540
void addPreEmitPass() override;
540541
void addPostBBSections() override;
541542
void addPreEmitPass2() override;
543+
bool addRegAssignAndRewriteOptimized() override;
542544

543545
std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
544546
};
@@ -876,6 +878,11 @@ void AArch64PassConfig::addPreEmitPass2() {
876878
addPass(createUnpackMachineBundles(nullptr));
877879
}
878880

881+
bool AArch64PassConfig::addRegAssignAndRewriteOptimized() {
882+
addPass(createAArch64PostCoalescerPass());
883+
return TargetPassConfig::addRegAssignAndRewriteOptimized();
884+
}
885+
879886
MachineFunctionInfo *AArch64TargetMachine::createMachineFunctionInfo(
880887
BumpPtrAllocator &Allocator, const Function &F,
881888
const TargetSubtargetInfo *STI) const {

llvm/lib/Target/AArch64/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ add_llvm_target(AArch64CodeGen
7373
AArch64MIPeepholeOpt.cpp
7474
AArch64MCInstLower.cpp
7575
AArch64PointerAuth.cpp
76+
AArch64PostCoalescerPass.cpp
7677
AArch64PromoteConstant.cpp
7778
AArch64PBQPRegAlloc.cpp
7879
AArch64RegisterInfo.cpp

llvm/test/CodeGen/AArch64/O3-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@
167167
; CHECK-NEXT: Register Coalescer
168168
; CHECK-NEXT: Rename Disconnected Subregister Components
169169
; CHECK-NEXT: Machine Instruction Scheduler
170+
; CHECK-NEXT: AArch64 Post Coalescer pass
170171
; CHECK-NEXT: Machine Block Frequency Analysis
171172
; CHECK-NEXT: Debug Variable Analysis
172173
; CHECK-NEXT: Live Stack Slot Analysis

llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ define void @dont_coalesce_args(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind
2525
; CHECK-REGALLOC: bb.0 (%ir-block.0):
2626
; CHECK-REGALLOC-NEXT: liveins: $q0
2727
; CHECK-REGALLOC-NEXT: {{ $}}
28-
; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
29-
; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
28+
; CHECK-REGALLOC-NEXT: STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0)
3029
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
3130
; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
3231
; CHECK-REGALLOC-NEXT: renamable $q0 = KILL killed renamable $q0, implicit-def $z0
@@ -61,7 +60,6 @@ define <2 x i64> @dont_coalesce_res() "aarch64_pstate_sm_body" nounwind {
6160
; CHECK-REGALLOC-NEXT: BL @scalable_res, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $z0
6261
; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
6362
; CHECK-REGALLOC-NEXT: renamable $q0 = KILL renamable $q0, implicit killed $z0
64-
; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
6563
; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
6664
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg
6765
; CHECK-REGALLOC-NEXT: $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
@@ -94,17 +92,13 @@ define <2 x i64> @dont_coalesce_arg_that_is_also_res(<2 x i64> %a) "aarch64_psta
9492
; CHECK-REGALLOC: bb.0 (%ir-block.0):
9593
; CHECK-REGALLOC-NEXT: liveins: $q0
9694
; CHECK-REGALLOC-NEXT: {{ $}}
97-
; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
98-
; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
95+
; CHECK-REGALLOC-NEXT: STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0)
9996
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
10097
; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
10198
; CHECK-REGALLOC-NEXT: renamable $q0 = KILL killed renamable $q0, implicit-def $z0
10299
; CHECK-REGALLOC-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
103100
; CHECK-REGALLOC-NEXT: BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp
104101
; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
105-
; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
106-
; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
107-
; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
108102
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg
109103
; CHECK-REGALLOC-NEXT: $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
110104
; CHECK-REGALLOC-NEXT: RET_ReallyLR implicit $q0

llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,7 @@ define void @streaming_compatible_arg(float %f) #0 {
5252
; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
5353
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
5454
; CHECK-NEXT: bl __arm_sme_state
55-
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
5655
; CHECK-NEXT: and x19, x0, #0x1
57-
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
5856
; CHECK-NEXT: tbz w19, #0, .LBB1_2
5957
; CHECK-NEXT: // %bb.1:
6058
; CHECK-NEXT: smstop sm

llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -331,9 +331,9 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw
331331
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
332332
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
333333
; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
334-
; CHECK-COMMON-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
334+
; CHECK-COMMON-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
335335
; CHECK-COMMON-NEXT: smstop sm
336-
; CHECK-COMMON-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
336+
; CHECK-COMMON-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
337337
; CHECK-COMMON-NEXT: bl __addtf3
338338
; CHECK-COMMON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
339339
; CHECK-COMMON-NEXT: smstart sm
@@ -392,9 +392,9 @@ define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounw
392392
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
393393
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
394394
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
395-
; CHECK-COMMON-NEXT: stp s1, s0, [sp, #8] // 8-byte Folded Spill
395+
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
396396
; CHECK-COMMON-NEXT: smstop sm
397-
; CHECK-COMMON-NEXT: ldp s1, s0, [sp, #8] // 8-byte Folded Reload
397+
; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
398398
; CHECK-COMMON-NEXT: bl fmodf
399399
; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
400400
; CHECK-COMMON-NEXT: smstart sm
@@ -422,9 +422,7 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
422422
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
423423
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
424424
; CHECK-COMMON-NEXT: bl __arm_sme_state
425-
; CHECK-COMMON-NEXT: ldp s2, s0, [sp, #8] // 8-byte Folded Reload
426425
; CHECK-COMMON-NEXT: and x19, x0, #0x1
427-
; CHECK-COMMON-NEXT: stp s2, s0, [sp, #8] // 8-byte Folded Spill
428426
; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2
429427
; CHECK-COMMON-NEXT: // %bb.1:
430428
; CHECK-COMMON-NEXT: smstop sm

llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,7 +1085,6 @@ define void @dont_coalesce_res_f32(ptr %ptr) #0 {
10851085
; CHECK-NEXT: smstart sm
10861086
; CHECK-NEXT: ptrue p0.s
10871087
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
1088-
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
10891088
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
10901089
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
10911090
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
@@ -1116,7 +1115,6 @@ define void @dont_coalesce_res_f64(ptr %ptr) #0 {
11161115
; CHECK-NEXT: smstart sm
11171116
; CHECK-NEXT: ptrue p0.d
11181117
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
1119-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
11201118
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
11211119
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
11221120
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
@@ -1151,7 +1149,6 @@ define void @dont_coalesce_res_v1i8(ptr %ptr) #0 {
11511149
; CHECK-NEXT: smstart sm
11521150
; CHECK-NEXT: ptrue p0.b
11531151
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
1154-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
11551152
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
11561153
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
11571154
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
@@ -1183,7 +1180,6 @@ define void @dont_coalesce_res_v1i16(ptr %ptr) #0 {
11831180
; CHECK-NEXT: smstart sm
11841181
; CHECK-NEXT: ptrue p0.h
11851182
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
1186-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
11871183
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
11881184
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
11891185
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
@@ -1215,7 +1211,6 @@ define void @dont_coalesce_res_v1i32(ptr %ptr) #0 {
12151211
; CHECK-NEXT: smstart sm
12161212
; CHECK-NEXT: ptrue p0.s
12171213
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
1218-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
12191214
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
12201215
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
12211216
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
@@ -1247,7 +1242,6 @@ define void @dont_coalesce_res_v1i64(ptr %ptr) #0 {
12471242
; CHECK-NEXT: smstart sm
12481243
; CHECK-NEXT: ptrue p0.d
12491244
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
1250-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
12511245
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
12521246
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
12531247
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
@@ -1311,7 +1305,6 @@ define void @dont_coalesce_res_v1f32(ptr %ptr) #0 {
13111305
; CHECK-NEXT: smstart sm
13121306
; CHECK-NEXT: ptrue p0.s
13131307
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
1314-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
13151308
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
13161309
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
13171310
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
@@ -1343,7 +1336,6 @@ define void @dont_coalesce_res_v1f64(ptr %ptr) #0 {
13431336
; CHECK-NEXT: smstart sm
13441337
; CHECK-NEXT: ptrue p0.d
13451338
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
1346-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
13471339
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
13481340
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
13491341
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload

llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,27 @@ declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible";
88
define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind {
99
; CHECK-LABEL: sm_body_sm_compatible_simple:
1010
; CHECK: // %bb.0:
11-
; CHECK-NEXT: sub sp, sp, #96
12-
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
13-
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
14-
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
15-
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
16-
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
11+
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
12+
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
13+
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
14+
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
15+
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
1716
; CHECK-NEXT: bl __arm_sme_state
1817
; CHECK-NEXT: and x8, x0, #0x1
1918
; CHECK-NEXT: tbnz w8, #0, .LBB0_2
2019
; CHECK-NEXT: // %bb.1:
2120
; CHECK-NEXT: smstart sm
2221
; CHECK-NEXT: .LBB0_2:
23-
; CHECK-NEXT: fmov s0, wzr
24-
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
2522
; CHECK-NEXT: tbnz w8, #0, .LBB0_4
2623
; CHECK-NEXT: // %bb.3:
2724
; CHECK-NEXT: smstop sm
2825
; CHECK-NEXT: .LBB0_4:
29-
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
30-
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
31-
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
32-
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
33-
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
34-
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
35-
; CHECK-NEXT: add sp, sp, #96
26+
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
27+
; CHECK-NEXT: fmov s0, wzr
28+
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
29+
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
30+
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
31+
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
3632
; CHECK-NEXT: ret
3733
ret float zeroinitializer
3834
}

llvm/test/CodeGen/AArch64/sme-streaming-body.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -247,15 +247,11 @@ define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_psta
247247
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
248248
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
249249
; CHECK-NEXT: smstart sm
250-
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
251-
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
252250
; CHECK-NEXT: smstop sm
253251
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
254252
; CHECK-NEXT: bl cos
255253
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
256254
; CHECK-NEXT: smstart sm
257-
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
258-
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
259255
; CHECK-NEXT: smstop sm
260256
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
261257
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload

llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -469,11 +469,7 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr
469469
; CHECK-NEXT: mov x9, x0
470470
; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
471471
; CHECK-NEXT: bl __arm_sme_state
472-
; CHECK-NEXT: ldp s4, s0, [sp, #8] // 8-byte Folded Reload
473472
; CHECK-NEXT: and x19, x0, #0x1
474-
; CHECK-NEXT: stp s4, s0, [sp, #8] // 8-byte Folded Spill
475-
; CHECK-NEXT: ldp d4, d0, [sp, #16] // 16-byte Folded Reload
476-
; CHECK-NEXT: stp d4, d0, [sp, #16] // 16-byte Folded Spill
477473
; CHECK-NEXT: tbz w19, #0, .LBB10_2
478474
; CHECK-NEXT: // %bb.1: // %entry
479475
; CHECK-NEXT: smstop sm

llvm/test/CodeGen/AArch64/sme-streaming-interface.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -405,11 +405,11 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr
405405
; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
406406
; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
407407
; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
408-
; CHECK-NEXT: stp s1, s0, [sp, #24] // 8-byte Folded Spill
409-
; CHECK-NEXT: stp d3, d2, [sp, #8] // 16-byte Folded Spill
408+
; CHECK-NEXT: stp d2, d3, [sp, #16] // 16-byte Folded Spill
409+
; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
410410
; CHECK-NEXT: smstop sm
411-
; CHECK-NEXT: ldp s1, s0, [sp, #24] // 8-byte Folded Reload
412-
; CHECK-NEXT: ldp d3, d2, [sp, #8] // 16-byte Folded Reload
411+
; CHECK-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
412+
; CHECK-NEXT: ldp d2, d3, [sp, #16] // 16-byte Folded Reload
413413
; CHECK-NEXT: bl bar
414414
; CHECK-NEXT: smstart sm
415415
; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload

0 commit comments

Comments
 (0)