|
4 | 4 | define arm_aapcs_vfpcc i16 @reduce_v16i16_shift_mul(<16 x i8> %s0, <16 x i8> %s1) {
|
5 | 5 | ; CHECK-LABEL: reduce_v16i16_shift_mul:
|
6 | 6 | ; CHECK: @ %bb.0: @ %entry
|
7 |
| -; CHECK-NEXT: .pad #32 |
8 |
| -; CHECK-NEXT: sub sp, #32 |
9 |
| -; CHECK-NEXT: add r0, sp, #16 |
10 |
| -; CHECK-NEXT: mov r1, sp |
11 |
| -; CHECK-NEXT: vstrw.32 q1, [r0] |
12 |
| -; CHECK-NEXT: vstrw.32 q0, [r1] |
13 |
| -; CHECK-NEXT: vldrb.u16 q0, [r0, #8] |
14 |
| -; CHECK-NEXT: vldrb.u16 q1, [r1, #8] |
15 |
| -; CHECK-NEXT: vldrb.u16 q2, [r1] |
16 |
| -; CHECK-NEXT: vmul.i16 q0, q1, q0 |
17 |
| -; CHECK-NEXT: vldrb.u16 q1, [r0] |
| 7 | +; CHECK-NEXT: vmullt.u8 q2, q0, q1 |
| 8 | +; CHECK-NEXT: vmullb.u8 q0, q0, q1 |
| 9 | +; CHECK-NEXT: vshr.s16 q2, q2, #14 |
18 | 10 | ; CHECK-NEXT: vshr.s16 q0, q0, #14
|
19 |
| -; CHECK-NEXT: vmul.i16 q1, q2, q1 |
20 |
| -; CHECK-NEXT: vaddv.u16 r0, q0 |
21 |
| -; CHECK-NEXT: vshr.s16 q1, q1, #14 |
22 |
| -; CHECK-NEXT: vaddva.u16 r0, q1 |
23 |
| -; CHECK-NEXT: add sp, #32 |
| 11 | +; CHECK-NEXT: vaddv.u16 r0, q2 |
| 12 | +; CHECK-NEXT: vaddva.u16 r0, q0 |
24 | 13 | ; CHECK-NEXT: bx lr
|
25 | 14 | entry:
|
26 | 15 | %s0s = zext <16 x i8> %s0 to <16 x i16>
|
@@ -50,23 +39,16 @@ entry:
|
50 | 39 | define arm_aapcs_vfpcc i16 @reduce_v16i16_shift_sub(<16 x i8> %s0, <16 x i8> %s1) {
|
51 | 40 | ; CHECK-LABEL: reduce_v16i16_shift_sub:
|
52 | 41 | ; CHECK: @ %bb.0: @ %entry
|
53 |
| -; CHECK-NEXT: .pad #32 |
54 |
| -; CHECK-NEXT: sub sp, #32 |
55 |
| -; CHECK-NEXT: add r0, sp, #16 |
56 |
| -; CHECK-NEXT: mov r1, sp |
57 |
| -; CHECK-NEXT: vstrw.32 q1, [r0] |
58 |
| -; CHECK-NEXT: vstrw.32 q0, [r1] |
59 |
| -; CHECK-NEXT: vldrb.u16 q0, [r0, #8] |
60 |
| -; CHECK-NEXT: vldrb.u16 q1, [r1, #8] |
61 |
| -; CHECK-NEXT: vldrb.u16 q2, [r1] |
62 |
| -; CHECK-NEXT: vsub.i16 q0, q1, q0 |
63 |
| -; CHECK-NEXT: vldrb.u16 q1, [r0] |
| 42 | +; CHECK-NEXT: vmovlt.u8 q2, q1 |
| 43 | +; CHECK-NEXT: vmovlt.u8 q3, q0 |
| 44 | +; CHECK-NEXT: vsub.i16 q2, q3, q2 |
| 45 | +; CHECK-NEXT: vmovlb.u8 q1, q1 |
| 46 | +; CHECK-NEXT: vmovlb.u8 q0, q0 |
| 47 | +; CHECK-NEXT: vshr.s16 q2, q2, #14 |
| 48 | +; CHECK-NEXT: vsub.i16 q0, q0, q1 |
| 49 | +; CHECK-NEXT: vaddv.u16 r0, q2 |
64 | 50 | ; CHECK-NEXT: vshr.s16 q0, q0, #14
|
65 |
| -; CHECK-NEXT: vsub.i16 q1, q2, q1 |
66 |
| -; CHECK-NEXT: vaddv.u16 r0, q0 |
67 |
| -; CHECK-NEXT: vshr.s16 q1, q1, #14 |
68 |
| -; CHECK-NEXT: vaddva.u16 r0, q1 |
69 |
| -; CHECK-NEXT: add sp, #32 |
| 51 | +; CHECK-NEXT: vaddva.u16 r0, q0 |
70 | 52 | ; CHECK-NEXT: bx lr
|
71 | 53 | entry:
|
72 | 54 | %s0s = zext <16 x i8> %s0 to <16 x i16>
|
@@ -190,17 +172,15 @@ define void @correlate(ptr nocapture noundef readonly %ID, ptr nocapture noundef
|
190 | 172 | ; CHECK-NEXT: .LBB4_8: @ %vector.body
|
191 | 173 | ; CHECK-NEXT: @ Parent Loop BB4_4 Depth=1
|
192 | 174 | ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
193 |
| -; CHECK-NEXT: vldrh.s32 q2, [r5], #16 |
194 |
| -; CHECK-NEXT: vldrh.s32 q1, [r4], #16 |
| 175 | +; CHECK-NEXT: vldrh.u16 q1, [r4], #16 |
| 176 | +; CHECK-NEXT: vldrh.u16 q2, [r5], #16 |
195 | 177 | ; CHECK-NEXT: rsb.w r1, r12, #0
|
196 |
| -; CHECK-NEXT: vmul.i32 q1, q2, q1 |
197 |
| -; CHECK-NEXT: vldrh.s32 q2, [r4, #-8] |
198 |
| -; CHECK-NEXT: vldrh.s32 q3, [r5, #-8] |
| 178 | +; CHECK-NEXT: vmullb.s16 q3, q2, q1 |
| 179 | +; CHECK-NEXT: vmullt.s16 q1, q2, q1 |
| 180 | +; CHECK-NEXT: vshl.s32 q3, r1 |
199 | 181 | ; CHECK-NEXT: vshl.s32 q1, r1
|
| 182 | +; CHECK-NEXT: vaddva.u32 r6, q3 |
200 | 183 | ; CHECK-NEXT: vaddva.u32 r6, q1
|
201 |
| -; CHECK-NEXT: vmul.i32 q2, q3, q2 |
202 |
| -; CHECK-NEXT: vshl.s32 q2, r1 |
203 |
| -; CHECK-NEXT: vaddva.u32 r6, q2 |
204 | 184 | ; CHECK-NEXT: le lr, .LBB4_8
|
205 | 185 | ; CHECK-NEXT: @ %bb.9: @ %middle.block
|
206 | 186 | ; CHECK-NEXT: @ in Loop: Header=BB4_4 Depth=1
|
|
0 commit comments