@@ -6,39 +6,36 @@ define arm_aapcs_vfpcc <4 x i32> @loads_i32(ptr %A, ptr %B, ptr %C) {
6
6
; CHECK: @ %bb.0: @ %entry
7
7
; CHECK-NEXT: .save {r4, r5, r6, lr}
8
8
; CHECK-NEXT: push {r4, r5, r6, lr}
9
- ; CHECK-NEXT: .vsave {d8, d9}
10
- ; CHECK-NEXT: vpush {d8, d9}
11
- ; CHECK-NEXT: vldrw.u32 q1, [r1]
12
- ; CHECK-NEXT: vmov.i64 q2, #0xffffffff
13
- ; CHECK-NEXT: vmov.f32 s0, s6
14
- ; CHECK-NEXT: vmov.f32 s2, s7
15
- ; CHECK-NEXT: vand q0, q0, q2
16
- ; CHECK-NEXT: vmov.f32 s6, s5
17
- ; CHECK-NEXT: vmov r4, r5, d0
18
- ; CHECK-NEXT: vmov r3, r1, d1
9
+ ; CHECK-NEXT: vldrw.u32 q2, [r1]
10
+ ; CHECK-NEXT: vmov.i64 q1, #0xffffffff
11
+ ; CHECK-NEXT: vmov.f32 s0, s10
12
+ ; CHECK-NEXT: vmov.f32 s2, s11
13
+ ; CHECK-NEXT: vand q0, q0, q1
14
+ ; CHECK-NEXT: vmov.f32 s10, s9
15
+ ; CHECK-NEXT: vmov r3, r4, d0
16
+ ; CHECK-NEXT: vand q2, q2, q1
17
+ ; CHECK-NEXT: vmov r5, r1, d1
19
18
; CHECK-NEXT: vldrw.u32 q0, [r0]
19
+ ; CHECK-NEXT: vldrw.u32 q1, [r2]
20
+ ; CHECK-NEXT: vmov lr, r12, d5
20
21
; CHECK-NEXT: vmov.f32 s12, s2
21
22
; CHECK-NEXT: vmov.f32 s2, s3
22
23
; CHECK-NEXT: vmov r0, s12
23
- ; CHECK-NEXT: vand q3, q1, q2
24
- ; CHECK-NEXT: vldrw.u32 q1, [r2]
25
- ; CHECK-NEXT: vmov lr, r12, d7
26
- ; CHECK-NEXT: vmov.f32 s16, s6
27
- ; CHECK-NEXT: vmov.f32 s18, s7
28
- ; CHECK-NEXT: vand q2, q4, q2
24
+ ; CHECK-NEXT: vmov.f32 s12, s6
25
+ ; CHECK-NEXT: vmov.f32 s6, s7
29
26
; CHECK-NEXT: asrs r2, r0, #31
30
- ; CHECK-NEXT: adds r0, r0, r4
31
- ; CHECK-NEXT: adcs r5 , r2
32
- ; CHECK-NEXT: vmov r2, s8
33
- ; CHECK-NEXT: asrl r0, r5 , r2
27
+ ; CHECK-NEXT: adds r0, r0, r3
28
+ ; CHECK-NEXT: adc.w r3 , r2, r4
29
+ ; CHECK-NEXT: vmov r2, s12
30
+ ; CHECK-NEXT: asrl r0, r3 , r2
34
31
; CHECK-NEXT: vmov r2, s2
35
32
; CHECK-NEXT: vmov.f32 s2, s1
36
- ; CHECK-NEXT: asrs r4 , r2, #31
37
- ; CHECK-NEXT: adds r2, r2, r3
38
- ; CHECK-NEXT: adcs r1, r4
39
- ; CHECK-NEXT: vmov r3, s10
33
+ ; CHECK-NEXT: asrs r3 , r2, #31
34
+ ; CHECK-NEXT: adds r2, r2, r5
35
+ ; CHECK-NEXT: adcs r1, r3
36
+ ; CHECK-NEXT: vmov r3, s6
40
37
; CHECK-NEXT: asrl r2, r1, r3
41
- ; CHECK-NEXT: vmov r4, r5, d6
38
+ ; CHECK-NEXT: vmov r4, r5, d4
42
39
; CHECK-NEXT: vmov r1, s2
43
40
; CHECK-NEXT: vmov.f32 s2, s5
44
41
; CHECK-NEXT: adds.w r6, r1, lr
@@ -54,7 +51,6 @@ define arm_aapcs_vfpcc <4 x i32> @loads_i32(ptr %A, ptr %B, ptr %C) {
54
51
; CHECK-NEXT: asrl r4, r1, r3
55
52
; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
56
53
; CHECK-NEXT: vmov q0[3], q0[1], r6, r2
57
- ; CHECK-NEXT: vpop {d8, d9}
58
54
; CHECK-NEXT: pop {r4, r5, r6, pc}
59
55
entry:
60
56
%a = load <4 x i32 >, ptr %A , align 4
@@ -138,62 +134,58 @@ entry:
138
134
define arm_aapcs_vfpcc void @load_store_i32 (ptr %A , ptr %B , ptr %C , ptr %D ) {
139
135
; CHECK-LABEL: load_store_i32:
140
136
; CHECK: @ %bb.0: @ %entry
141
- ; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
142
- ; CHECK-NEXT: push {r4, r5, r6, r7, lr}
143
- ; CHECK-NEXT: .pad #4
144
- ; CHECK-NEXT: sub sp, #4
145
- ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
146
- ; CHECK-NEXT: vpush {d8, d9, d10, d11}
147
- ; CHECK-NEXT: vldrw.u32 q0, [r1]
148
- ; CHECK-NEXT: vmov.i64 q4, #0xffffffff
149
- ; CHECK-NEXT: vmov.f32 s4, s2
150
- ; CHECK-NEXT: vmov.f32 s2, s1
151
- ; CHECK-NEXT: vmov.f32 s6, s3
152
- ; CHECK-NEXT: vand q2, q0, q4
137
+ ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
138
+ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
139
+ ; CHECK-NEXT: .vsave {d8}
140
+ ; CHECK-NEXT: vpush {d8}
141
+ ; CHECK-NEXT: vldrw.u32 q2, [r1]
142
+ ; CHECK-NEXT: vmov.i64 q0, #0xffffffff
143
+ ; CHECK-NEXT: vmov.f32 s4, s10
144
+ ; CHECK-NEXT: vmov.f32 s6, s11
145
+ ; CHECK-NEXT: vmov.f32 s10, s9
146
+ ; CHECK-NEXT: vand q1, q1, q0
147
+ ; CHECK-NEXT: vand q2, q2, q0
153
148
; CHECK-NEXT: vldrw.u32 q0, [r0]
154
- ; CHECK-NEXT: vand q1, q1, q4
155
- ; CHECK-NEXT: vmov r5, r1, d3
149
+ ; CHECK-NEXT: vmov r6, r4, d3
156
150
; CHECK-NEXT: vmov.f32 s12, s2
157
151
; CHECK-NEXT: vmov.f32 s2, s3
158
- ; CHECK-NEXT: vmov r0 , r12, d2
152
+ ; CHECK-NEXT: vmov lr , r12, d2
159
153
; CHECK-NEXT: vldrw.u32 q1, [r2]
160
- ; CHECK-NEXT: vmov r4, lr, d5
161
- ; CHECK-NEXT: vmov.f32 s20, s6
162
- ; CHECK-NEXT: vmov.f32 s6, s1
163
- ; CHECK-NEXT: vmov.f32 s22, s7
164
- ; CHECK-NEXT: vand q4, q5, q4
165
- ; CHECK-NEXT: vmov r6, s2
154
+ ; CHECK-NEXT: vmov r5, r1, d5
155
+ ; CHECK-NEXT: vmov.f32 s16, s6
156
+ ; CHECK-NEXT: vmov.f32 s6, s7
157
+ ; CHECK-NEXT: vmov.f32 s10, s1
158
+ ; CHECK-NEXT: vmov r0, s2
166
159
; CHECK-NEXT: vmov.f32 s2, s5
167
- ; CHECK-NEXT: adds r2, r6, r5
168
- ; CHECK-NEXT: vmov r5, s18
169
- ; CHECK-NEXT: asr .w r7, r6, #31
170
- ; CHECK-NEXT: adcs r1, r7
171
- ; CHECK-NEXT: asrl r2, r1, r5
172
- ; CHECK-NEXT: vmov r7, s2
173
- ; CHECK-NEXT: vmov r1, s6
174
- ; CHECK-NEXT: adds r4, r4, r1
175
- ; CHECK-NEXT: asr.w r5, r1, #31
176
- ; CHECK-NEXT: adc.w r1, r5, lr
177
- ; CHECK-NEXT: asrl r4 , r1, r7
178
- ; CHECK-NEXT: vmov r6, r5 , d4
160
+ ; CHECK-NEXT: adds.w r8, r0, r6
161
+ ; CHECK-NEXT: asr.w r2, r0, #31
162
+ ; CHECK-NEXT: adc .w r7, r2, r4
163
+ ; CHECK-NEXT: vmov r2, s6
164
+ ; CHECK-NEXT: asrl r8, r7, r2
165
+ ; CHECK-NEXT: vmov r2, s10
166
+ ; CHECK-NEXT: asrs r4, r2, #31
167
+ ; CHECK-NEXT: adds r2, r2, r5
168
+ ; CHECK-NEXT: adcs r1, r4
169
+ ; CHECK-NEXT: vmov r4, s2
170
+ ; CHECK-NEXT: asrl r2 , r1, r4
171
+ ; CHECK-NEXT: vmov r5, r7 , d4
179
172
; CHECK-NEXT: vmov r1, s12
180
- ; CHECK-NEXT: adds r0, r0, r1
181
- ; CHECK-NEXT: asr.w r7 , r1, #31
182
- ; CHECK-NEXT: adc.w r1, r7 , r12
183
- ; CHECK-NEXT: vmov r7 , s16
184
- ; CHECK-NEXT: asrl r0 , r1, r7
173
+ ; CHECK-NEXT: adds.w r6, r1, lr
174
+ ; CHECK-NEXT: asr.w r4 , r1, #31
175
+ ; CHECK-NEXT: adc.w r1, r4 , r12
176
+ ; CHECK-NEXT: vmov r4 , s16
177
+ ; CHECK-NEXT: asrl r6 , r1, r4
185
178
; CHECK-NEXT: vmov r1, s0
186
- ; CHECK-NEXT: adds r6, r6, r1
187
- ; CHECK-NEXT: asr.w r7 , r1, #31
188
- ; CHECK-NEXT: adc.w r1, r7, r5
179
+ ; CHECK-NEXT: adds r0, r1, r5
180
+ ; CHECK-NEXT: asr.w r4 , r1, #31
181
+ ; CHECK-NEXT: adc.w r1, r4, r7
189
182
; CHECK-NEXT: vmov r7, s4
190
- ; CHECK-NEXT: asrl r6 , r1, r7
191
- ; CHECK-NEXT: vmov q0[2], q0[0], r6, r0
192
- ; CHECK-NEXT: vmov q0[3], q0[1], r4, r2
183
+ ; CHECK-NEXT: asrl r0 , r1, r7
184
+ ; CHECK-NEXT: vmov q0[2], q0[0], r0, r6
185
+ ; CHECK-NEXT: vmov q0[3], q0[1], r2, r8
193
186
; CHECK-NEXT: vstrw.32 q0, [r3]
194
- ; CHECK-NEXT: vpop {d8, d9, d10, d11}
195
- ; CHECK-NEXT: add sp, #4
196
- ; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
187
+ ; CHECK-NEXT: vpop {d8}
188
+ ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
197
189
entry:
198
190
%a = load <4 x i32 >, ptr %A , align 4
199
191
%b = load <4 x i32 >, ptr %B , align 4
0 commit comments