Skip to content

Commit f2bab3a

Browse files
CarolineConcattowwwatermiao
authored andcommitted
[AArch64] Restore Z-registers before P-registers (#79623) (#82492)
This is needed by PR#77665[1] that uses a P-register while restoring Z-registers. The reverse for SVE register restore in the epilogue was added to guarantee performance, but further work was done to improve sve frame restore and besides that the schedule also may change the order of the restore, undoing the reverse restore. This also fix the problem reported in (PR #79623) on Windows with std::reverse and .base(). [1]llvm/llvm-project#77665 Signed-off-by: chenmiao <[email protected]> Signed-off-by: chenmiao <[email protected]>
1 parent 079b5b6 commit f2bab3a

9 files changed

+179
-178
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2978,11 +2978,6 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
29782978
return MIB->getIterator();
29792979
};
29802980

2981-
// SVE objects are always restored in reverse order.
2982-
for (const RegPairInfo &RPI : reverse(RegPairs))
2983-
if (RPI.isScalable())
2984-
EmitMI(RPI);
2985-
29862981
if (homogeneousPrologEpilog(MF, &MBB)) {
29872982
auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
29882983
.setMIFlag(MachineInstr::FrameDestroy);
@@ -2993,11 +2988,19 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
29932988
return true;
29942989
}
29952990

2991+
// For performance reasons restore SVE register in increasing order
2992+
auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
2993+
auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
2994+
auto PPREnd = std::find_if_not(PPRBegin, RegPairs.end(), IsPPR);
2995+
std::reverse(PPRBegin, PPREnd);
2996+
auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };
2997+
auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
2998+
auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.end(), IsZPR);
2999+
std::reverse(ZPRBegin, ZPREnd);
3000+
29963001
if (ReverseCSRRestoreSeq) {
29973002
MachineBasicBlock::iterator First = MBB.end();
29983003
for (const RegPairInfo &RPI : reverse(RegPairs)) {
2999-
if (RPI.isScalable())
3000-
continue;
30013004
MachineBasicBlock::iterator It = EmitMI(RPI);
30023005
if (First == MBB.end())
30033006
First = It;
@@ -3006,8 +3009,6 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
30063009
MBB.splice(MBBI, &MBB, First);
30073010
} else {
30083011
for (const RegPairInfo &RPI : RegPairs) {
3009-
if (RPI.isScalable())
3010-
continue;
30113012
(void)EmitMI(RPI);
30123013
}
30133014
}

llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
; CHECK-NEXT: // implicit-def: $p4
2020
; CHECK-NEXT: addvl sp, sp, #1
2121
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
22-
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
2322
; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
23+
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
2424
; CHECK-NEXT: addvl sp, sp, #2
2525
; CHECK-NEXT: .cfi_def_cfa wsp, 16
2626
; CHECK-NEXT: .cfi_restore z8

llvm/test/CodeGen/AArch64/framelayout-sve.mir

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -771,9 +771,9 @@ body: |
771771

772772
# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
773773
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
774-
# CHECK-NEXT: $z10 = frame-destroy LDR_ZXI $sp, 0
774+
# CHECK-NEXT: $z10 = frame-destroy LDR_ZXI $sp, 0
775775
# CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1
776-
# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2
776+
# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2
777777
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3
778778
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
779779
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8
@@ -872,14 +872,14 @@ body: |
872872
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
873873
# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 1
874874
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
875-
# CHECK: $p15 = frame-destroy LDR_PXI $sp, 4
876-
# CHECK: $p14 = frame-destroy LDR_PXI $sp, 5
877-
# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
878-
# CHECK: $p4 = frame-destroy LDR_PXI $sp, 15
879875
# CHECK: $z23 = frame-destroy LDR_ZXI $sp, 2
880876
# CHECK: $z22 = frame-destroy LDR_ZXI $sp, 3
881877
# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16
882878
# CHECK: $z8 = frame-destroy LDR_ZXI $sp, 17
879+
# CHECK: $p15 = frame-destroy LDR_PXI $sp, 4
880+
# CHECK: $p14 = frame-destroy LDR_PXI $sp, 5
881+
# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
882+
# CHECK: $p4 = frame-destroy LDR_PXI $sp, 15
883883
# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 18
884884
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 32
885885
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8
@@ -1036,14 +1036,14 @@ body: |
10361036
# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
10371037

10381038
# CHECK: $sp = frame-destroy ADDVL_XXI $fp, -18
1039+
# CHECK: $z23 = frame-destroy LDR_ZXI $sp, 2
1040+
# CHECK-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 3
1041+
# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16
1042+
# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 17
10391043
# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4
10401044
# CHECK-NEXT: $p14 = frame-destroy LDR_PXI $sp, 5
10411045
# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
10421046
# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 15
1043-
# CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2
1044-
# CHECK-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 3
1045-
# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16
1046-
# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 17
10471047
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8
10481048
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z9
10491049
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z10
@@ -1197,10 +1197,10 @@ body: |
11971197

11981198
# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 7
11991199
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
1200-
# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 6
1201-
# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 7
12021200
# CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 1
12031201
# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2
1202+
# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 6
1203+
# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 7
12041204
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3
12051205
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
12061206
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8

llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -226,18 +226,6 @@ define <vscale x 2 x double> @streaming_compatible_with_scalable_vectors(<vscale
226226
; CHECK-NEXT: ldr z1, [sp] // 16-byte Folded Reload
227227
; CHECK-NEXT: fadd z0.d, z1.d, z0.d
228228
; CHECK-NEXT: addvl sp, sp, #2
229-
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
230-
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
231-
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
232-
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
233-
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
234-
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
235-
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
236-
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
237-
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
238-
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
239-
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
240-
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
241229
; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
242230
; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
243231
; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
@@ -254,6 +242,18 @@ define <vscale x 2 x double> @streaming_compatible_with_scalable_vectors(<vscale
254242
; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
255243
; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
256244
; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
245+
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
246+
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
247+
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
248+
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
249+
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
250+
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
251+
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
252+
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
253+
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
254+
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
255+
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
256+
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
257257
; CHECK-NEXT: addvl sp, sp, #18
258258
; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
259259
; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload
@@ -318,18 +318,6 @@ define <vscale x 2 x i1> @streaming_compatible_with_predicate_vectors(<vscale x
318318
; CHECK-NEXT: ldr p1, [sp, #6, mul vl] // 2-byte Folded Reload
319319
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
320320
; CHECK-NEXT: addvl sp, sp, #1
321-
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
322-
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
323-
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
324-
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
325-
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
326-
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
327-
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
328-
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
329-
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
330-
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
331-
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
332-
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
333321
; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
334322
; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
335323
; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
@@ -346,6 +334,18 @@ define <vscale x 2 x i1> @streaming_compatible_with_predicate_vectors(<vscale x
346334
; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
347335
; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
348336
; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
337+
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
338+
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
339+
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
340+
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
341+
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
342+
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
343+
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
344+
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
345+
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
346+
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
347+
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
348+
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
349349
; CHECK-NEXT: addvl sp, sp, #18
350350
; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
351351
; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload

llvm/test/CodeGen/AArch64/sme-streaming-interface.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -187,18 +187,6 @@ define <vscale x 4 x i32> @smstart_clobber_sve(<vscale x 4 x i32> %x) #0 {
187187
; CHECK-NEXT: smstop sm
188188
; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
189189
; CHECK-NEXT: addvl sp, sp, #1
190-
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
191-
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
192-
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
193-
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
194-
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
195-
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
196-
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
197-
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
198-
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
199-
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
200-
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
201-
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
202190
; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
203191
; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
204192
; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
@@ -215,6 +203,18 @@ define <vscale x 4 x i32> @smstart_clobber_sve(<vscale x 4 x i32> %x) #0 {
215203
; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
216204
; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
217205
; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
206+
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
207+
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
208+
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
209+
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
210+
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
211+
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
212+
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
213+
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
214+
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
215+
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
216+
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
217+
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
218218
; CHECK-NEXT: addvl sp, sp, #18
219219
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
220220
; CHECK-NEXT: ret
@@ -267,18 +267,6 @@ define <vscale x 4 x i32> @smstart_clobber_sve_duplicate(<vscale x 4 x i32> %x)
267267
; CHECK-NEXT: smstop sm
268268
; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
269269
; CHECK-NEXT: addvl sp, sp, #1
270-
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
271-
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
272-
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
273-
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
274-
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
275-
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
276-
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
277-
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
278-
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
279-
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
280-
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
281-
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
282270
; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
283271
; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
284272
; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
@@ -295,6 +283,18 @@ define <vscale x 4 x i32> @smstart_clobber_sve_duplicate(<vscale x 4 x i32> %x)
295283
; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
296284
; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
297285
; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
286+
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
287+
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
288+
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
289+
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
290+
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
291+
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
292+
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
293+
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
294+
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
295+
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
296+
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
297+
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
298298
; CHECK-NEXT: addvl sp, sp, #18
299299
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
300300
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/sve-alloca.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,18 +66,6 @@ define void @foo(<vscale x 4 x i64> %dst, i1 %cond) {
6666
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
6767
; CHECK-NEXT: bl bar
6868
; CHECK-NEXT: addvl sp, x29, #-18
69-
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
70-
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
71-
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
72-
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
73-
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
74-
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
75-
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
76-
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
77-
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
78-
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
79-
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
80-
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
8169
; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
8270
; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
8371
; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
@@ -94,6 +82,18 @@ define void @foo(<vscale x 4 x i64> %dst, i1 %cond) {
9482
; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
9583
; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
9684
; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
85+
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
86+
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
87+
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
88+
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
89+
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
90+
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
91+
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
92+
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
93+
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
94+
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
95+
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
96+
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
9797
; CHECK-NEXT: mov sp, x29
9898
; CHECK-NEXT: ldp x28, x19, [sp, #16] // 16-byte Folded Reload
9999
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload

0 commit comments

Comments
 (0)