|
3 | 3 |
|
4 | 4 | target triple = "aarch64-unknown-linux-gnu"
|
5 | 5 |
|
| 6 | +declare void @def(ptr) |
| 7 | + |
6 | 8 | define void @st1d_fixed(ptr %ptr) #0 {
|
7 | 9 | ; CHECK-LABEL: st1d_fixed:
|
8 | 10 | ; CHECK: // %bb.0:
|
9 |
| -; CHECK-NEXT: sub sp, sp, #16 |
10 |
| -; CHECK-NEXT: add x8, sp, #8 |
| 11 | +; CHECK-NEXT: sub sp, sp, #144 |
| 12 | +; CHECK-NEXT: stp x30, x19, [sp, #128] // 16-byte Folded Spill |
| 13 | +; CHECK-NEXT: mov x19, x0 |
| 14 | +; CHECK-NEXT: mov x0, sp |
| 15 | +; CHECK-NEXT: bl def |
11 | 16 | ; CHECK-NEXT: ptrue p0.d
|
12 |
| -; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x8] |
13 |
| -; CHECK-NEXT: mov x8, #4 |
14 |
| -; CHECK-NEXT: mov z0.d, #0 // =0x0 |
15 |
| -; CHECK-NEXT: st1d { z0.d }, p0, [x0] |
16 |
| -; CHECK-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] |
17 |
| -; CHECK-NEXT: add sp, sp, #16 |
| 17 | +; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [sp] |
| 18 | +; CHECK-NEXT: st1d { z0.d }, p0, [x19] |
| 19 | +; CHECK-NEXT: ldp x30, x19, [sp, #128] // 16-byte Folded Reload |
| 20 | +; CHECK-NEXT: add sp, sp, #144 |
18 | 21 | ; CHECK-NEXT: ret
|
19 |
| - %alloc = alloca [16 x double], i32 0 |
| 22 | + %alloc = alloca [16 x double] |
| 23 | + call void @def(ptr %alloc) |
20 | 24 | %load = load <8 x double>, ptr %alloc
|
21 | 25 | %strided.vec = shufflevector <8 x double> %load, <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
22 |
| - store <8 x double> zeroinitializer, ptr %ptr |
| 26 | + store <4 x double> %strided.vec, ptr %ptr |
23 | 27 | ret void
|
24 | 28 | }
|
25 | 29 |
|
|
0 commit comments