Skip to content

Commit 93b89be

Browse files
committed
[AArch64][SVE] Fix the indexed addressing mode when FI = 0.
This is an alternative fix to D145497, which also addresses #60918 In D124457 which added the original code for this, @efriedma pointed out that it wasn't safe to assume that FI #0 would be allocated at offset 0, but that part of the patch went in without any changes. The downside of this solution is that any access to an object on the stack that has been allocated at SP + 0, still gets moved to a separate register first, which degrades performance. Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D146056
1 parent 82238fc commit 93b89be

7 files changed

+86
-72
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6545,7 +6545,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
65456545
int FI = cast<FrameIndexSDNode>(N)->getIndex();
65466546
// We can only encode VL scaled offsets, so only fold in frame indexes
65476547
// referencing SVE objects.
6548-
if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) {
6548+
if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
65496549
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
65506550
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
65516551
return true;
@@ -6580,7 +6580,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
65806580
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
65816581
// We can only encode VL scaled offsets, so only fold in frame indexes
65826582
// referencing SVE objects.
6583-
if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector)
6583+
if (MFI.getStackID(FI) == TargetStackID::ScalableVector)
65846584
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
65856585
}
65866586

llvm/test/CodeGen/AArch64/sve-fixed-ld2-alloca.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,19 @@ declare void @def(ptr)
88
define void @st1d_fixed(ptr %ptr) #0 {
99
; CHECK-LABEL: st1d_fixed:
1010
; CHECK: // %bb.0:
11-
; CHECK-NEXT: sub sp, sp, #144
12-
; CHECK-NEXT: stp x30, x19, [sp, #128] // 16-byte Folded Spill
11+
; CHECK-NEXT: sub sp, sp, #160
12+
; CHECK-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill
1313
; CHECK-NEXT: mov x19, x0
1414
; CHECK-NEXT: mov x0, sp
15+
; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill
16+
; CHECK-NEXT: mov x20, sp
1517
; CHECK-NEXT: bl def
1618
; CHECK-NEXT: ptrue p0.d
17-
; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [sp]
19+
; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x20]
20+
; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload
1821
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
19-
; CHECK-NEXT: ldp x30, x19, [sp, #128] // 16-byte Folded Reload
20-
; CHECK-NEXT: add sp, sp, #144
22+
; CHECK-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload
23+
; CHECK-NEXT: add sp, sp, #160
2124
; CHECK-NEXT: ret
2225
%alloc = alloca [16 x double]
2326
call void @def(ptr %alloc)

llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@ target triple = "aarch64-unknown-linux-gnu"
99
; accessing fixed width objects.
1010
define void @foo(ptr %a) #0 {
1111
; CHECK-LABEL: foo:
12-
; CHECK: SelectionDAG has 14 nodes:
12+
; CHECK: SelectionDAG has 15 nodes:
1313
; CHECK-NEXT: t0: ch,glue = EntryToken
1414
; CHECK-NEXT: t12: nxv2i1 = PTRUE_D TargetConstant:i32<31>
1515
; CHECK-NEXT: t2: i64,ch = CopyFromReg t0, Register:i64 %0
1616
; CHECK-NEXT: t18: nxv2i64,ch = LD1D_IMM<Mem:(volatile load (s512) from %ir.a)> t12, t2, TargetConstant:i64<0>, t0
1717
; CHECK-NEXT: t8: i64 = ADDXri TargetFrameIndex:i64<1>, TargetConstant:i32<0>, TargetConstant:i32<0>
18-
; CHECK-NEXT: t17: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r0)> t18, t12, TargetFrameIndex:i64<0>, TargetConstant:i64<0>, t18:1
18+
; CHECK-NEXT: t6: i64 = ADDXri TargetFrameIndex:i64<0>, TargetConstant:i32<0>, TargetConstant:i32<0>
19+
; CHECK-NEXT: t17: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r0)> t18, t12, t6, TargetConstant:i64<0>, t18:1
1920
; CHECK-NEXT: t16: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r1)> t18, t12, t8, TargetConstant:i64<0>, t17
2021
; CHECK-NEXT: t10: ch = RET_ReallyLR t16
2122
; CHECK-EMPTY:

llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll

Lines changed: 42 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -205,14 +205,15 @@ define void @test_rev_elts_fail(ptr %a) #1 {
205205
; CHECK-NEXT: ptrue p0.d
206206
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
207207
; CHECK-NEXT: mov z1.d, z0.d[2]
208-
; CHECK-NEXT: mov z2.d, z0.d[3]
209-
; CHECK-NEXT: mov x10, v0.d[1]
210-
; CHECK-NEXT: fmov x8, d1
211-
; CHECK-NEXT: fmov x9, d2
212208
; CHECK-NEXT: fmov x11, d0
209+
; CHECK-NEXT: fmov x8, d1
210+
; CHECK-NEXT: mov z1.d, z0.d[3]
211+
; CHECK-NEXT: fmov x9, d1
212+
; CHECK-NEXT: mov x10, v0.d[1]
213213
; CHECK-NEXT: stp x9, x8, [sp, #16]
214+
; CHECK-NEXT: mov x8, sp
214215
; CHECK-NEXT: stp x10, x11, [sp]
215-
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
216+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
216217
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
217218
; CHECK-NEXT: mov sp, x29
218219
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
@@ -273,9 +274,9 @@ define void @test_revv8i32(ptr %a) #0 {
273274
; CHECK-NEXT: ptrue p0.s, vl8
274275
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
275276
; CHECK-NEXT: mov w8, v0.s[1]
277+
; CHECK-NEXT: fmov w10, s0
276278
; CHECK-NEXT: mov w9, v0.s[2]
277279
; CHECK-NEXT: mov w11, v0.s[3]
278-
; CHECK-NEXT: fmov w10, s0
279280
; CHECK-NEXT: mov z1.s, z0.s[4]
280281
; CHECK-NEXT: mov z2.s, z0.s[5]
281282
; CHECK-NEXT: mov z3.s, z0.s[6]
@@ -287,8 +288,9 @@ define void @test_revv8i32(ptr %a) #0 {
287288
; CHECK-NEXT: fmov w9, s3
288289
; CHECK-NEXT: fmov w11, s0
289290
; CHECK-NEXT: stp w8, w10, [sp, #8]
291+
; CHECK-NEXT: mov x8, sp
290292
; CHECK-NEXT: stp w11, w9, [sp]
291-
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
293+
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
292294
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
293295
; CHECK-NEXT: mov sp, x29
294296
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
@@ -392,44 +394,45 @@ define void @test_rev_fail(ptr %a) #1 {
392394
; CHECK-NEXT: mov z1.h, z0.h[8]
393395
; CHECK-NEXT: fmov w8, s0
394396
; CHECK-NEXT: fmov w9, s1
397+
; CHECK-NEXT: mov z4.h, z0.h[11]
395398
; CHECK-NEXT: mov z5.h, z0.h[12]
396399
; CHECK-NEXT: mov z2.h, z0.h[9]
400+
; CHECK-NEXT: strh w8, [sp, #14]
401+
; CHECK-NEXT: fmov w8, s4
397402
; CHECK-NEXT: mov z3.h, z0.h[10]
398-
; CHECK-NEXT: mov z4.h, z0.h[11]
399-
; CHECK-NEXT: fmov w11, s2
400403
; CHECK-NEXT: strh w9, [sp, #30]
401404
; CHECK-NEXT: fmov w9, s5
405+
; CHECK-NEXT: mov z16.h, z0.h[15]
406+
; CHECK-NEXT: fmov w11, s2
402407
; CHECK-NEXT: fmov w12, s3
403-
; CHECK-NEXT: strh w8, [sp, #14]
404-
; CHECK-NEXT: fmov w8, s4
408+
; CHECK-NEXT: strh w8, [sp, #24]
409+
; CHECK-NEXT: fmov w8, s16
405410
; CHECK-NEXT: mov z6.h, z0.h[13]
406411
; CHECK-NEXT: mov z7.h, z0.h[14]
407-
; CHECK-NEXT: mov z16.h, z0.h[15]
408412
; CHECK-NEXT: umov w10, v0.h[1]
409413
; CHECK-NEXT: strh w9, [sp, #22]
410414
; CHECK-NEXT: umov w9, v0.h[2]
411415
; CHECK-NEXT: strh w11, [sp, #28]
412416
; CHECK-NEXT: fmov w11, s6
413417
; CHECK-NEXT: strh w12, [sp, #26]
414418
; CHECK-NEXT: fmov w12, s7
415-
; CHECK-NEXT: strh w8, [sp, #24]
416-
; CHECK-NEXT: fmov w8, s16
419+
; CHECK-NEXT: strh w8, [sp, #16]
420+
; CHECK-NEXT: umov w8, v0.h[5]
417421
; CHECK-NEXT: strh w10, [sp, #12]
418422
; CHECK-NEXT: strh w11, [sp, #20]
419423
; CHECK-NEXT: umov w11, v0.h[3]
420424
; CHECK-NEXT: strh w12, [sp, #18]
421425
; CHECK-NEXT: umov w12, v0.h[4]
422-
; CHECK-NEXT: strh w8, [sp, #16]
423-
; CHECK-NEXT: umov w8, v0.h[5]
424426
; CHECK-NEXT: umov w10, v0.h[6]
425427
; CHECK-NEXT: strh w9, [sp, #10]
426428
; CHECK-NEXT: umov w9, v0.h[7]
429+
; CHECK-NEXT: strh w8, [sp, #4]
430+
; CHECK-NEXT: mov x8, sp
427431
; CHECK-NEXT: strh w11, [sp, #8]
428432
; CHECK-NEXT: strh w12, [sp, #6]
429-
; CHECK-NEXT: strh w8, [sp, #4]
430433
; CHECK-NEXT: strh w10, [sp, #2]
431434
; CHECK-NEXT: strh w9, [sp]
432-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
435+
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
433436
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
434437
; CHECK-NEXT: mov sp, x29
435438
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
@@ -457,39 +460,39 @@ define void @test_revv8i16v8i16(ptr %a, ptr %b, ptr %c) #1 {
457460
; CHECK-NEXT: orr x9, x8, #0x1e
458461
; CHECK-NEXT: orr x10, x8, #0x1c
459462
; CHECK-NEXT: ldr q1, [x0]
460-
; CHECK-NEXT: orr x12, x8, #0x10
461463
; CHECK-NEXT: orr x11, x8, #0x18
464+
; CHECK-NEXT: orr x12, x8, #0x10
462465
; CHECK-NEXT: str h0, [sp, #22]
463466
; CHECK-NEXT: st1 { v0.h }[4], [x9]
464467
; CHECK-NEXT: orr x9, x8, #0xe
465468
; CHECK-NEXT: st1 { v0.h }[5], [x10]
466469
; CHECK-NEXT: orr x10, x8, #0xc
467-
; CHECK-NEXT: st1 { v0.h }[3], [x12]
468-
; CHECK-NEXT: mov w12, #26
469-
; CHECK-NEXT: st1 { v1.h }[4], [x9]
470-
; CHECK-NEXT: orr x9, x8, #0x8
471470
; CHECK-NEXT: st1 { v0.h }[7], [x11]
472-
; CHECK-NEXT: orr x11, x8, #0x2
471+
; CHECK-NEXT: orr x11, x8, #0x8
472+
; CHECK-NEXT: st1 { v1.h }[4], [x9]
473+
; CHECK-NEXT: orr x9, x8, #0x4
473474
; CHECK-NEXT: st1 { v1.h }[5], [x10]
474-
; CHECK-NEXT: orr x10, x8, #0x4
475-
; CHECK-NEXT: st1 { v1.h }[7], [x9]
475+
; CHECK-NEXT: mov w10, #26
476+
; CHECK-NEXT: orr x10, x8, x10
477+
; CHECK-NEXT: st1 { v0.h }[3], [x12]
478+
; CHECK-NEXT: st1 { v1.h }[1], [x9]
479+
; CHECK-NEXT: orr x9, x8, #0x2
480+
; CHECK-NEXT: st1 { v1.h }[7], [x11]
481+
; CHECK-NEXT: mov w11, #20
482+
; CHECK-NEXT: mov w12, #18
483+
; CHECK-NEXT: st1 { v0.h }[6], [x10]
484+
; CHECK-NEXT: mov w10, #10
485+
; CHECK-NEXT: orr x11, x8, x11
486+
; CHECK-NEXT: st1 { v1.h }[2], [x9]
476487
; CHECK-NEXT: orr x9, x8, x12
477-
; CHECK-NEXT: st1 { v1.h }[2], [x11]
478-
; CHECK-NEXT: mov w11, #10
479-
; CHECK-NEXT: st1 { v1.h }[1], [x10]
480-
; CHECK-NEXT: mov w10, #18
481-
; CHECK-NEXT: st1 { v0.h }[6], [x9]
482-
; CHECK-NEXT: mov w9, #20
483-
; CHECK-NEXT: orr x9, x8, x9
484488
; CHECK-NEXT: orr x10, x8, x10
485489
; CHECK-NEXT: st1 { v1.h }[3], [x8]
486-
; CHECK-NEXT: orr x8, x8, x11
487-
; CHECK-NEXT: str h1, [sp, #6]
490+
; CHECK-NEXT: st1 { v0.h }[1], [x11]
488491
; CHECK-NEXT: ptrue p0.h
489-
; CHECK-NEXT: st1 { v0.h }[1], [x9]
490-
; CHECK-NEXT: st1 { v0.h }[2], [x10]
491-
; CHECK-NEXT: st1 { v1.h }[6], [x8]
492-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
492+
; CHECK-NEXT: st1 { v0.h }[2], [x9]
493+
; CHECK-NEXT: st1 { v1.h }[6], [x10]
494+
; CHECK-NEXT: str h1, [sp, #6]
495+
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
493496
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
494497
; CHECK-NEXT: mov sp, x29
495498
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload

llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ define void @zip_v4f64(ptr %a, ptr %b) #0 {
140140
; VBITS_EQ_512-NEXT: sub x9, sp, #48
141141
; VBITS_EQ_512-NEXT: and sp, x9, #0xffffffffffffffe0
142142
; VBITS_EQ_512-NEXT: ptrue p0.d, vl4
143+
; VBITS_EQ_512-NEXT: mov x8, sp
143144
; VBITS_EQ_512-NEXT: ld1d { z0.d }, p0/z, [x0]
144145
; VBITS_EQ_512-NEXT: ld1d { z1.d }, p0/z, [x1]
145146
; VBITS_EQ_512-NEXT: mov z2.d, z1.d[3]
@@ -149,7 +150,7 @@ define void @zip_v4f64(ptr %a, ptr %b) #0 {
149150
; VBITS_EQ_512-NEXT: mov z3.d, z0.d[2]
150151
; VBITS_EQ_512-NEXT: zip1 z0.d, z0.d, z1.d
151152
; VBITS_EQ_512-NEXT: stp d3, d2, [sp]
152-
; VBITS_EQ_512-NEXT: ld1d { z2.d }, p0/z, [sp]
153+
; VBITS_EQ_512-NEXT: ld1d { z2.d }, p0/z, [x8]
153154
; VBITS_EQ_512-NEXT: fadd z0.d, p0/m, z0.d, z2.d
154155
; VBITS_EQ_512-NEXT: st1d { z0.d }, p0, [x0]
155156
; VBITS_EQ_512-NEXT: mov sp, x29
@@ -657,6 +658,7 @@ define void @zip_vscale2_4(ptr %a, ptr %b) #2 {
657658
; CHECK-NEXT: sub x9, sp, #48
658659
; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
659660
; CHECK-NEXT: ptrue p0.d, vl4
661+
; CHECK-NEXT: mov x8, sp
660662
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
661663
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
662664
; CHECK-NEXT: mov z2.d, z1.d[3]
@@ -666,7 +668,7 @@ define void @zip_vscale2_4(ptr %a, ptr %b) #2 {
666668
; CHECK-NEXT: mov z3.d, z0.d[2]
667669
; CHECK-NEXT: zip1 z0.d, z0.d, z1.d
668670
; CHECK-NEXT: stp d3, d2, [sp]
669-
; CHECK-NEXT: ld1d { z2.d }, p0/z, [sp]
671+
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x8]
670672
; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z2.d
671673
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
672674
; CHECK-NEXT: mov sp, x29

llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -918,14 +918,15 @@ define void @shuffle_ext_invalid(ptr %a, ptr %b) vscale_range(2,0) #0 {
918918
; CHECK-NEXT: sub x9, sp, #48
919919
; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
920920
; CHECK-NEXT: ptrue p0.d, vl4
921+
; CHECK-NEXT: mov x8, sp
921922
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
922923
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
923924
; CHECK-NEXT: mov z2.d, z1.d[1]
924925
; CHECK-NEXT: stp d1, d2, [sp, #16]
925926
; CHECK-NEXT: mov z1.d, z0.d[3]
926927
; CHECK-NEXT: mov z0.d, z0.d[2]
927928
; CHECK-NEXT: stp d0, d1, [sp]
928-
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
929+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
929930
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
930931
; CHECK-NEXT: mov sp, x29
931932
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,25 @@ declare void @def(ptr)
88
define void @alloc_v4i8(ptr %st_ptr) #0 {
99
; CHECK-LABEL: alloc_v4i8:
1010
; CHECK: // %bb.0:
11-
; CHECK-NEXT: sub sp, sp, #32
12-
; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
11+
; CHECK-NEXT: sub sp, sp, #48
12+
; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
1313
; CHECK-NEXT: mov x19, x0
14-
; CHECK-NEXT: add x0, sp, #12
14+
; CHECK-NEXT: add x0, sp, #28
15+
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
16+
; CHECK-NEXT: add x20, sp, #28
1517
; CHECK-NEXT: bl def
16-
; CHECK-NEXT: add x8, sp, #12
1718
; CHECK-NEXT: ptrue p0.b, vl2
18-
; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x8]
19+
; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x20]
1920
; CHECK-NEXT: ptrue p0.s, vl2
21+
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
2022
; CHECK-NEXT: mov z2.b, z0.b[1]
2123
; CHECK-NEXT: fmov w8, s0
2224
; CHECK-NEXT: fmov w9, s2
23-
; CHECK-NEXT: stp w8, w9, [sp]
24-
; CHECK-NEXT: ldr d0, [sp]
25+
; CHECK-NEXT: stp w8, w9, [sp, #8]
26+
; CHECK-NEXT: ldr d0, [sp, #8]
2527
; CHECK-NEXT: st1b { z0.s }, p0, [x19]
26-
; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
27-
; CHECK-NEXT: add sp, sp, #32
28+
; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
29+
; CHECK-NEXT: add sp, sp, #48
2830
; CHECK-NEXT: ret
2931
%alloc = alloca [4 x i8]
3032
call void @def(ptr %alloc)
@@ -38,32 +40,34 @@ define void @alloc_v6i8(ptr %st_ptr) #0 {
3840
; CHECK-LABEL: alloc_v6i8:
3941
; CHECK: // %bb.0:
4042
; CHECK-NEXT: sub sp, sp, #48
41-
; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
43+
; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
4244
; CHECK-NEXT: mov x19, x0
4345
; CHECK-NEXT: add x0, sp, #24
46+
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
47+
; CHECK-NEXT: add x20, sp, #24
4448
; CHECK-NEXT: bl def
45-
; CHECK-NEXT: add x8, sp, #24
4649
; CHECK-NEXT: ptrue p0.b, vl3
47-
; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x8]
50+
; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x20]
4851
; CHECK-NEXT: ptrue p0.h, vl4
4952
; CHECK-NEXT: fmov w8, s1
5053
; CHECK-NEXT: mov z2.b, z1.b[3]
5154
; CHECK-NEXT: mov z3.b, z1.b[2]
5255
; CHECK-NEXT: mov z0.b, z1.b[1]
5356
; CHECK-NEXT: fmov w9, s2
5457
; CHECK-NEXT: fmov w10, s3
55-
; CHECK-NEXT: strh w8, [sp, #8]
58+
; CHECK-NEXT: strh w8, [sp]
5659
; CHECK-NEXT: fmov w8, s0
57-
; CHECK-NEXT: strh w9, [sp, #14]
58-
; CHECK-NEXT: strh w10, [sp, #12]
59-
; CHECK-NEXT: strh w8, [sp, #10]
60-
; CHECK-NEXT: add x8, sp, #20
61-
; CHECK-NEXT: ldr d0, [sp, #8]
60+
; CHECK-NEXT: strh w9, [sp, #6]
61+
; CHECK-NEXT: strh w10, [sp, #4]
62+
; CHECK-NEXT: strh w8, [sp, #2]
63+
; CHECK-NEXT: add x8, sp, #12
64+
; CHECK-NEXT: ldr d0, [sp]
6265
; CHECK-NEXT: st1b { z0.h }, p0, [x8]
63-
; CHECK-NEXT: ldrh w8, [sp, #20]
66+
; CHECK-NEXT: ldrh w8, [sp, #12]
6467
; CHECK-NEXT: strb w10, [x19, #2]
68+
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
6569
; CHECK-NEXT: strh w8, [x19]
66-
; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
70+
; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
6771
; CHECK-NEXT: add sp, sp, #48
6872
; CHECK-NEXT: ret
6973
%alloc = alloca [6 x i8]
@@ -135,7 +139,7 @@ define void @alloc_v8f64(ptr %st_ptr) #0 {
135139
; CHECK-NEXT: bl def
136140
; CHECK-NEXT: mov x8, #4
137141
; CHECK-NEXT: ptrue p0.d, vl2
138-
; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [sp]
142+
; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x20]
139143
; CHECK-NEXT: ld2d { z2.d, z3.d }, p0/z, [x20, x8, lsl #3]
140144
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
141145
; CHECK-NEXT: stp q0, q2, [x19]

0 commit comments

Comments
 (0)