Skip to content

Commit 047a0b6

Browse files
committed
[ARM] Disable MVE fptosi and friends
The prevents us from trying to convert an i1 predicate vector to a float, or vice-versa. Better patterns are possible, which will follow in a subsequent commit. For now we just expand them. Differential Revision: https://reviews.llvm.org/D65066 llvm-svn: 366931
1 parent c19c307 commit 047a0b6

File tree

2 files changed

+165
-50
lines changed

2 files changed

+165
-50
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,10 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
349349
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
350350
setOperationAction(ISD::SETCC, VT, Custom);
351351
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
352+
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
353+
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
354+
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
355+
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
352356
}
353357
}
354358

Lines changed: 161 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
33

44
define arm_aapcs_vfpcc <4 x i32> @sext_v4i1_v4i32(<4 x i32> %src) {
55
; CHECK-LABEL: sext_v4i1_v4i32:
66
; CHECK: @ %bb.0: @ %entry
7+
; CHECK-NEXT: vmov.i32 q1, #0x0
8+
; CHECK-NEXT: vmov.i8 q2, #0xff
79
; CHECK-NEXT: vcmp.s32 gt, q0, zr
8-
; CHECK-NEXT: vmov.i32 q0, #0x0
9-
; CHECK-NEXT: vmov.i8 q1, #0xff
10-
; CHECK-NEXT: vpsel q0, q1, q0
10+
; CHECK-NEXT: vpsel q0, q2, q1
1111
; CHECK-NEXT: bx lr
1212
entry:
1313
%c = icmp sgt <4 x i32> %src, zeroinitializer
@@ -18,10 +18,10 @@ entry:
1818
define arm_aapcs_vfpcc <8 x i16> @sext_v8i1_v8i16(<8 x i16> %src) {
1919
; CHECK-LABEL: sext_v8i1_v8i16:
2020
; CHECK: @ %bb.0: @ %entry
21+
; CHECK-NEXT: vmov.i16 q1, #0x0
22+
; CHECK-NEXT: vmov.i8 q2, #0xff
2123
; CHECK-NEXT: vcmp.s16 gt, q0, zr
22-
; CHECK-NEXT: vmov.i16 q0, #0x0
23-
; CHECK-NEXT: vmov.i8 q1, #0xff
24-
; CHECK-NEXT: vpsel q0, q1, q0
24+
; CHECK-NEXT: vpsel q0, q2, q1
2525
; CHECK-NEXT: bx lr
2626
entry:
2727
%c = icmp sgt <8 x i16> %src, zeroinitializer
@@ -32,10 +32,10 @@ entry:
3232
define arm_aapcs_vfpcc <16 x i8> @sext_v16i1_v16i8(<16 x i8> %src) {
3333
; CHECK-LABEL: sext_v16i1_v16i8:
3434
; CHECK: @ %bb.0: @ %entry
35+
; CHECK-NEXT: vmov.i8 q1, #0x0
36+
; CHECK-NEXT: vmov.i8 q2, #0xff
3537
; CHECK-NEXT: vcmp.s8 gt, q0, zr
36-
; CHECK-NEXT: vmov.i8 q0, #0x0
37-
; CHECK-NEXT: vmov.i8 q1, #0xff
38-
; CHECK-NEXT: vpsel q0, q1, q0
38+
; CHECK-NEXT: vpsel q0, q2, q1
3939
; CHECK-NEXT: bx lr
4040
entry:
4141
%c = icmp sgt <16 x i8> %src, zeroinitializer
@@ -46,31 +46,30 @@ entry:
4646
define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2i64(<2 x i64> %src) {
4747
; CHECK-LABEL: sext_v2i1_v2i64:
4848
; CHECK: @ %bb.0: @ %entry
49-
; CHECK-NEXT: vmov r1, s0
49+
; CHECK-NEXT: vmov r1, s2
5050
; CHECK-NEXT: movs r2, #0
51-
; CHECK-NEXT: vmov r0, s1
51+
; CHECK-NEXT: vmov r0, s3
52+
; CHECK-NEXT: vmov r3, s0
5253
; CHECK-NEXT: rsbs r1, r1, #0
54+
; CHECK-NEXT: vmov r1, s1
5355
; CHECK-NEXT: sbcs.w r0, r2, r0
54-
; CHECK-NEXT: vmov r1, s2
5556
; CHECK-NEXT: mov.w r0, #0
5657
; CHECK-NEXT: it lt
5758
; CHECK-NEXT: movlt r0, #1
5859
; CHECK-NEXT: cmp r0, #0
5960
; CHECK-NEXT: it ne
6061
; CHECK-NEXT: movne.w r0, #-1
61-
; CHECK-NEXT: vmov.32 q1[0], r0
62-
; CHECK-NEXT: vmov.32 q1[1], r0
63-
; CHECK-NEXT: vmov r0, s3
64-
; CHECK-NEXT: rsbs r1, r1, #0
65-
; CHECK-NEXT: sbcs.w r0, r2, r0
62+
; CHECK-NEXT: rsbs r3, r3, #0
63+
; CHECK-NEXT: sbcs.w r1, r2, r1
6664
; CHECK-NEXT: it lt
6765
; CHECK-NEXT: movlt r2, #1
6866
; CHECK-NEXT: cmp r2, #0
6967
; CHECK-NEXT: it ne
7068
; CHECK-NEXT: movne.w r2, #-1
71-
; CHECK-NEXT: vmov.32 q1[2], r2
72-
; CHECK-NEXT: vmov.32 q1[3], r2
73-
; CHECK-NEXT: vmov q0, q1
69+
; CHECK-NEXT: vmov.32 q0[0], r2
70+
; CHECK-NEXT: vmov.32 q0[1], r2
71+
; CHECK-NEXT: vmov.32 q0[2], r0
72+
; CHECK-NEXT: vmov.32 q0[3], r0
7473
; CHECK-NEXT: bx lr
7574
entry:
7675
%c = icmp sgt <2 x i64> %src, zeroinitializer
@@ -82,10 +81,10 @@ entry:
8281
define arm_aapcs_vfpcc <4 x i32> @zext_v4i1_v4i32(<4 x i32> %src) {
8382
; CHECK-LABEL: zext_v4i1_v4i32:
8483
; CHECK: @ %bb.0: @ %entry
84+
; CHECK-NEXT: vmov.i32 q1, #0x0
85+
; CHECK-NEXT: vmov.i32 q2, #0x1
8586
; CHECK-NEXT: vcmp.s32 gt, q0, zr
86-
; CHECK-NEXT: vmov.i32 q0, #0x0
87-
; CHECK-NEXT: vmov.i32 q1, #0x1
88-
; CHECK-NEXT: vpsel q0, q1, q0
87+
; CHECK-NEXT: vpsel q0, q2, q1
8988
; CHECK-NEXT: bx lr
9089
entry:
9190
%c = icmp sgt <4 x i32> %src, zeroinitializer
@@ -96,10 +95,10 @@ entry:
9695
define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8i16(<8 x i16> %src) {
9796
; CHECK-LABEL: zext_v8i1_v8i16:
9897
; CHECK: @ %bb.0: @ %entry
98+
; CHECK-NEXT: vmov.i16 q1, #0x0
99+
; CHECK-NEXT: vmov.i16 q2, #0x1
99100
; CHECK-NEXT: vcmp.s16 gt, q0, zr
100-
; CHECK-NEXT: vmov.i16 q0, #0x0
101-
; CHECK-NEXT: vmov.i16 q1, #0x1
102-
; CHECK-NEXT: vpsel q0, q1, q0
101+
; CHECK-NEXT: vpsel q0, q2, q1
103102
; CHECK-NEXT: bx lr
104103
entry:
105104
%c = icmp sgt <8 x i16> %src, zeroinitializer
@@ -110,10 +109,10 @@ entry:
110109
define arm_aapcs_vfpcc <16 x i8> @zext_v16i1_v16i8(<16 x i8> %src) {
111110
; CHECK-LABEL: zext_v16i1_v16i8:
112111
; CHECK: @ %bb.0: @ %entry
112+
; CHECK-NEXT: vmov.i8 q1, #0x0
113+
; CHECK-NEXT: vmov.i8 q2, #0x1
113114
; CHECK-NEXT: vcmp.s8 gt, q0, zr
114-
; CHECK-NEXT: vmov.i8 q0, #0x0
115-
; CHECK-NEXT: vmov.i8 q1, #0x1
116-
; CHECK-NEXT: vpsel q0, q1, q0
115+
; CHECK-NEXT: vpsel q0, q2, q1
117116
; CHECK-NEXT: bx lr
118117
entry:
119118
%c = icmp sgt <16 x i8> %src, zeroinitializer
@@ -124,31 +123,31 @@ entry:
124123
define arm_aapcs_vfpcc <2 x i64> @zext_v2i1_v2i64(<2 x i64> %src) {
125124
; CHECK-LABEL: zext_v2i1_v2i64:
126125
; CHECK: @ %bb.0: @ %entry
127-
; CHECK-NEXT: vmov r1, s0
128-
; CHECK-NEXT: movs r2, #0
129-
; CHECK-NEXT: vmov r0, s1
130-
; CHECK-NEXT: rsbs r1, r1, #0
131-
; CHECK-NEXT: sbcs.w r0, r2, r0
132-
; CHECK-NEXT: vmov r1, s2
133-
; CHECK-NEXT: mov.w r0, #0
126+
; CHECK-NEXT: vmov r2, s2
127+
; CHECK-NEXT: adr r1, .LCPI7_0
128+
; CHECK-NEXT: vldrw.u32 q1, [r1]
129+
; CHECK-NEXT: vmov r1, s3
130+
; CHECK-NEXT: vmov r3, s0
131+
; CHECK-NEXT: movs r0, #0
132+
; CHECK-NEXT: rsbs r2, r2, #0
133+
; CHECK-NEXT: vmov r2, s1
134+
; CHECK-NEXT: sbcs.w r1, r0, r1
135+
; CHECK-NEXT: mov.w r1, #0
136+
; CHECK-NEXT: it lt
137+
; CHECK-NEXT: movlt r1, #1
138+
; CHECK-NEXT: cmp r1, #0
139+
; CHECK-NEXT: it ne
140+
; CHECK-NEXT: movne.w r1, #-1
141+
; CHECK-NEXT: rsbs r3, r3, #0
142+
; CHECK-NEXT: sbcs.w r2, r0, r2
134143
; CHECK-NEXT: it lt
135144
; CHECK-NEXT: movlt r0, #1
136145
; CHECK-NEXT: cmp r0, #0
137146
; CHECK-NEXT: it ne
138147
; CHECK-NEXT: movne.w r0, #-1
139-
; CHECK-NEXT: vmov.32 q1[0], r0
140-
; CHECK-NEXT: vmov r0, s3
141-
; CHECK-NEXT: rsbs r1, r1, #0
142-
; CHECK-NEXT: sbcs.w r0, r2, r0
143-
; CHECK-NEXT: it lt
144-
; CHECK-NEXT: movlt r2, #1
145-
; CHECK-NEXT: adr r0, .LCPI7_0
146-
; CHECK-NEXT: cmp r2, #0
147-
; CHECK-NEXT: vldrw.u32 q0, [r0]
148-
; CHECK-NEXT: it ne
149-
; CHECK-NEXT: movne.w r2, #-1
150-
; CHECK-NEXT: vmov.32 q1[2], r2
151-
; CHECK-NEXT: vand q0, q1, q0
148+
; CHECK-NEXT: vmov.32 q0[0], r0
149+
; CHECK-NEXT: vmov.32 q0[2], r1
150+
; CHECK-NEXT: vand q0, q0, q1
152151
; CHECK-NEXT: bx lr
153152
; CHECK-NEXT: .p2align 4
154153
; CHECK-NEXT: @ %bb.1:
@@ -162,3 +161,115 @@ entry:
162161
%0 = zext <2 x i1> %c to <2 x i64>
163162
ret <2 x i64> %0
164163
}
164+
165+
166+
define arm_aapcs_vfpcc <4 x float> @uitofp_v4i1_v4f32(<4 x i32> %src) {
167+
; CHECK-LABEL: uitofp_v4i1_v4f32:
168+
; CHECK: @ %bb.0: @ %entry
169+
; CHECK-NEXT: vcmp.s32 gt, q0, zr
170+
; CHECK-NEXT: vmrs r0, p0
171+
; CHECK-NEXT: ubfx r1, r0, #8, #1
172+
; CHECK-NEXT: ubfx r2, r0, #12, #1
173+
; CHECK-NEXT: vmov s0, r2
174+
; CHECK-NEXT: vmov s4, r1
175+
; CHECK-NEXT: vcvt.f32.u32 s3, s0
176+
; CHECK-NEXT: ubfx r2, r0, #4, #1
177+
; CHECK-NEXT: vcvt.f32.u32 s2, s4
178+
; CHECK-NEXT: and r0, r0, #1
179+
; CHECK-NEXT: vmov s4, r2
180+
; CHECK-NEXT: vcvt.f32.u32 s1, s4
181+
; CHECK-NEXT: vmov s4, r0
182+
; CHECK-NEXT: vcvt.f32.u32 s0, s4
183+
; CHECK-NEXT: bx lr
184+
entry:
185+
%c = icmp sgt <4 x i32> %src, zeroinitializer
186+
%0 = uitofp <4 x i1> %c to <4 x float>
187+
ret <4 x float> %0
188+
}
189+
190+
define arm_aapcs_vfpcc <4 x float> @sitofp_v4i1_v4f32(<4 x i32> %src) {
191+
; CHECK-LABEL: sitofp_v4i1_v4f32:
192+
; CHECK: @ %bb.0: @ %entry
193+
; CHECK-NEXT: vcmp.s32 gt, q0, zr
194+
; CHECK-NEXT: vmrs r0, p0
195+
; CHECK-NEXT: and r1, r0, #1
196+
; CHECK-NEXT: ubfx r2, r0, #8, #1
197+
; CHECK-NEXT: ubfx r3, r0, #4, #1
198+
; CHECK-NEXT: ubfx r0, r0, #12, #1
199+
; CHECK-NEXT: rsbs r2, r2, #0
200+
; CHECK-NEXT: rsbs r0, r0, #0
201+
; CHECK-NEXT: vmov s4, r2
202+
; CHECK-NEXT: vmov s0, r0
203+
; CHECK-NEXT: rsbs r0, r3, #0
204+
; CHECK-NEXT: vcvt.f32.s32 s3, s0
205+
; CHECK-NEXT: vcvt.f32.s32 s2, s4
206+
; CHECK-NEXT: vmov s4, r0
207+
; CHECK-NEXT: rsbs r0, r1, #0
208+
; CHECK-NEXT: vcvt.f32.s32 s1, s4
209+
; CHECK-NEXT: vmov s4, r0
210+
; CHECK-NEXT: vcvt.f32.s32 s0, s4
211+
; CHECK-NEXT: bx lr
212+
entry:
213+
%c = icmp sgt <4 x i32> %src, zeroinitializer
214+
%0 = sitofp <4 x i1> %c to <4 x float>
215+
ret <4 x float> %0
216+
}
217+
218+
define arm_aapcs_vfpcc <4 x float> @fptoui_v4i1_v4f32(<4 x float> %src) {
219+
; CHECK-LABEL: fptoui_v4i1_v4f32:
220+
; CHECK: @ %bb.0: @ %entry
221+
; CHECK-NEXT: vcvt.s32.f32 s4, s0
222+
; CHECK-NEXT: movs r0, #0
223+
; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
224+
; CHECK-NEXT: vmov r1, s4
225+
; CHECK-NEXT: vcvt.s32.f32 s4, s1
226+
; CHECK-NEXT: rsbs r1, r1, #0
227+
; CHECK-NEXT: bfi r0, r1, #0, #4
228+
; CHECK-NEXT: vmov r1, s4
229+
; CHECK-NEXT: vcvt.s32.f32 s4, s2
230+
; CHECK-NEXT: vcvt.s32.f32 s0, s3
231+
; CHECK-NEXT: rsbs r1, r1, #0
232+
; CHECK-NEXT: bfi r0, r1, #4, #4
233+
; CHECK-NEXT: vmov r1, s4
234+
; CHECK-NEXT: vmov.i32 q1, #0x0
235+
; CHECK-NEXT: rsbs r1, r1, #0
236+
; CHECK-NEXT: bfi r0, r1, #8, #4
237+
; CHECK-NEXT: vmov r1, s0
238+
; CHECK-NEXT: rsbs r1, r1, #0
239+
; CHECK-NEXT: bfi r0, r1, #12, #4
240+
; CHECK-NEXT: vmsr p0, r0
241+
; CHECK-NEXT: vpsel q0, q2, q1
242+
; CHECK-NEXT: bx lr
243+
entry:
244+
%0 = fptoui <4 x float> %src to <4 x i1>
245+
%s = select <4 x i1> %0, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float> zeroinitializer
246+
ret <4 x float> %s
247+
}
248+
249+
define arm_aapcs_vfpcc <4 x float> @fptosi_v4i1_v4f32(<4 x float> %src) {
250+
; CHECK-LABEL: fptosi_v4i1_v4f32:
251+
; CHECK: @ %bb.0: @ %entry
252+
; CHECK-NEXT: vcvt.s32.f32 s4, s0
253+
; CHECK-NEXT: movs r0, #0
254+
; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
255+
; CHECK-NEXT: vmov r1, s4
256+
; CHECK-NEXT: vcvt.s32.f32 s4, s1
257+
; CHECK-NEXT: bfi r0, r1, #0, #4
258+
; CHECK-NEXT: vmov r1, s4
259+
; CHECK-NEXT: vcvt.s32.f32 s4, s2
260+
; CHECK-NEXT: bfi r0, r1, #4, #4
261+
; CHECK-NEXT: vcvt.s32.f32 s0, s3
262+
; CHECK-NEXT: vmov r1, s4
263+
; CHECK-NEXT: vmov.i32 q1, #0x0
264+
; CHECK-NEXT: bfi r0, r1, #8, #4
265+
; CHECK-NEXT: vmov r1, s0
266+
; CHECK-NEXT: bfi r0, r1, #12, #4
267+
; CHECK-NEXT: vmsr p0, r0
268+
; CHECK-NEXT: vpsel q0, q2, q1
269+
; CHECK-NEXT: bx lr
270+
entry:
271+
%0 = fptosi <4 x float> %src to <4 x i1>
272+
%s = select <4 x i1> %0, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float> zeroinitializer
273+
ret <4 x float> %s
274+
}
275+

0 commit comments

Comments
 (0)