Skip to content

Commit 03bf229

Browse files
committed
[ARM] Multi-vector MVE spill test
This is a test from D67169, that can now be added after the vld2 intrinsics were committed upstream.
1 parent df3ae1e commit 03bf229

File tree

1 file changed

+103
-0
lines changed

1 file changed

+103
-0
lines changed
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -O3 -mattr=+mve %s -o - | FileCheck %s
3+
4+
declare void @external_function()
5+
6+
define arm_aapcs_vfpcc void @spill_multivector(<4 x i32>* %p) {
7+
; CHECK-LABEL: spill_multivector:
8+
; CHECK: @ %bb.0: @ %entry
9+
; CHECK-NEXT: .save {r4, r5, r6, lr}
10+
; CHECK-NEXT: push {r4, r5, r6, lr}
11+
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
12+
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
13+
; CHECK-NEXT: .pad #112
14+
; CHECK-NEXT: sub sp, #112
15+
; CHECK-NEXT: vld20.32 {q0, q1}, [r0]
16+
; CHECK-NEXT: add.w lr, sp, #64
17+
; CHECK-NEXT: mov r4, r0
18+
; CHECK-NEXT: vld21.32 {q0, q1}, [r0]
19+
; CHECK-NEXT: adds r0, #64
20+
; CHECK-NEXT: vstmia lr, {d0, d1, d2, d3} @ 32-byte Spill
21+
; CHECK-NEXT: add.w lr, sp, #32
22+
; CHECK-NEXT: vld20.32 {q0, q1}, [r0]
23+
; CHECK-NEXT: vld21.32 {q0, q1}, [r0]
24+
; CHECK-NEXT: add.w r0, r4, #128
25+
; CHECK-NEXT: vstmia lr, {d0, d1, d2, d3} @ 32-byte Spill
26+
; CHECK-NEXT: vld20.32 {q0, q1}, [r0]
27+
; CHECK-NEXT: vld21.32 {q0, q1}, [r0]
28+
; CHECK-NEXT: add.w r0, r4, #192
29+
; CHECK-NEXT: vstmia sp, {d0, d1, d2, d3} @ 32-byte Spill
30+
; CHECK-NEXT: vld20.32 {q6, q7}, [r0]
31+
; CHECK-NEXT: vld21.32 {q6, q7}, [r0]
32+
; CHECK-NEXT: add.w r0, r4, #256
33+
; CHECK-NEXT: vld20.32 {q4, q5}, [r0]
34+
; CHECK-NEXT: vld21.32 {q4, q5}, [r0]
35+
; CHECK-NEXT: bl external_function
36+
; CHECK-NEXT: vldmia sp, {d2, d3, d4, d5} @ 32-byte Reload
37+
; CHECK-NEXT: add r0, sp, #32
38+
; CHECK-NEXT: vstrw.32 q2, [r4, #80]
39+
; CHECK-NEXT: vstrw.32 q5, [r4, #144]
40+
; CHECK-NEXT: vstrw.32 q4, [r4, #128]
41+
; CHECK-NEXT: vstrw.32 q7, [r4, #112]
42+
; CHECK-NEXT: vstrw.32 q1, [r4, #64]
43+
; CHECK-NEXT: vldmia r0, {d2, d3, d4, d5} @ 32-byte Reload
44+
; CHECK-NEXT: add r0, sp, #64
45+
; CHECK-NEXT: vstrw.32 q2, [r4, #48]
46+
; CHECK-NEXT: vstrw.32 q6, [r4, #96]
47+
; CHECK-NEXT: vstrw.32 q1, [r4, #32]
48+
; CHECK-NEXT: vldmia r0, {d2, d3, d4, d5} @ 32-byte Reload
49+
; CHECK-NEXT: vstrw.32 q2, [r4, #16]
50+
; CHECK-NEXT: vstrw.32 q1, [r4]
51+
; CHECK-NEXT: add sp, #112
52+
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
53+
; CHECK-NEXT: pop {r4, r5, r6, pc}
54+
entry:
55+
%ip01 = bitcast <4 x i32>* %p to i32*
56+
%v01 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0i32(i32* %ip01)
57+
%ip23 = getelementptr i32, i32* %ip01, i32 16
58+
%v23 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0i32(i32* %ip23)
59+
%ip45 = getelementptr i32, i32* %ip23, i32 16
60+
%v45 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0i32(i32* %ip45)
61+
%ip67 = getelementptr i32, i32* %ip45, i32 16
62+
%v67 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0i32(i32* %ip67)
63+
%ip89 = getelementptr i32, i32* %ip67, i32 16
64+
%v89 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0i32(i32* %ip89)
65+
call void @external_function()
66+
67+
%v0 = extractvalue { <4 x i32>, <4 x i32> } %v01, 0
68+
%v1 = extractvalue { <4 x i32>, <4 x i32> } %v01, 1
69+
store <4 x i32> %v0, <4 x i32>* %p, align 4
70+
%p1 = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
71+
store <4 x i32> %v1, <4 x i32>* %p1, align 4
72+
73+
%v2 = extractvalue { <4 x i32>, <4 x i32> } %v23, 0
74+
%v3 = extractvalue { <4 x i32>, <4 x i32> } %v23, 1
75+
%p2 = getelementptr <4 x i32>, <4 x i32>* %p, i32 2
76+
store <4 x i32> %v2, <4 x i32>* %p2, align 4
77+
%p3 = getelementptr <4 x i32>, <4 x i32>* %p, i32 3
78+
store <4 x i32> %v3, <4 x i32>* %p3, align 4
79+
80+
%v4 = extractvalue { <4 x i32>, <4 x i32> } %v45, 0
81+
%v5 = extractvalue { <4 x i32>, <4 x i32> } %v45, 1
82+
%p4 = getelementptr <4 x i32>, <4 x i32>* %p, i32 4
83+
store <4 x i32> %v4, <4 x i32>* %p4, align 4
84+
%p5 = getelementptr <4 x i32>, <4 x i32>* %p, i32 5
85+
store <4 x i32> %v5, <4 x i32>* %p5, align 4
86+
87+
%v6 = extractvalue { <4 x i32>, <4 x i32> } %v67, 0
88+
%v7 = extractvalue { <4 x i32>, <4 x i32> } %v67, 1
89+
%p6 = getelementptr <4 x i32>, <4 x i32>* %p, i32 6
90+
store <4 x i32> %v6, <4 x i32>* %p6, align 4
91+
%p7 = getelementptr <4 x i32>, <4 x i32>* %p, i32 7
92+
store <4 x i32> %v7, <4 x i32>* %p7, align 4
93+
94+
%v8 = extractvalue { <4 x i32>, <4 x i32> } %v89, 0
95+
%v9 = extractvalue { <4 x i32>, <4 x i32> } %v89, 1
96+
%p8 = getelementptr <4 x i32>, <4 x i32>* %p, i32 8
97+
store <4 x i32> %v8, <4 x i32>* %p8, align 4
98+
%p9 = getelementptr <4 x i32>, <4 x i32>* %p, i32 9
99+
store <4 x i32> %v9, <4 x i32>* %p9, align 4
100+
ret void
101+
}
102+
103+
declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0i32(i32*)

0 commit comments

Comments
 (0)