You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[ARM] Fix instruction selection for MVE vsbciq intrinsic (#118284)
There were two bugs in the implementation of the MVE vsbciq (subtract
with carry across vector, with initial carry value) intrinsics:
* The VSBCI instruction behaves as if the carry-in is always set, but we
were selecting it when the carry-in is clear.
* The vsbciq intrinsics should generate IR with the carry-in set, but
they were leaving it clear.
These two bugs almost cancelled each other out, but resulted in
incorrect code when the vsbcq intrinsics (with a carry-in) were used,
and the carry-in was a compile time constant.
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/Thumb2/mve-intrinsics/vadc.ll
+90-2Lines changed: 90 additions & 2 deletions
Original file line number
Diff line number
Diff line change
@@ -108,7 +108,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_s32(<4 x i32> %a, <4 x i32> %b, pt
108
108
; CHECK-NEXT: str r1, [r0]
109
109
; CHECK-NEXT: bx lr
110
110
entry:
111
-
%0 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i320)
111
+
%0 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32536870912)
112
112
%1 = extractvalue { <4 x i32>, i32 } %0, 1
113
113
%2 = lshri32%1, 29
114
114
%3 = andi32%2, 1
@@ -125,6 +125,46 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_u32(<4 x i32> %a, <4 x i32> %b, pt
125
125
; CHECK-NEXT: ubfx r1, r1, #29, #1
126
126
; CHECK-NEXT: str r1, [r0]
127
127
; CHECK-NEXT: bx lr
128
+
entry:
129
+
%0 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32536870912)
130
+
%1 = extractvalue { <4 x i32>, i32 } %0, 1
131
+
%2 = lshri32%1, 29
132
+
%3 = andi32%2, 1
133
+
storei32%3, ptr%carry_out, align4
134
+
%4 = extractvalue { <4 x i32>, i32 } %0, 0
135
+
ret <4 x i32> %4
136
+
}
137
+
138
+
define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_s32_carry_in_zero(<4 x i32> %a, <4 x i32> %b, ptrnocapture%carry_out) {
139
+
; CHECK-LABEL: test_vsbcq_s32_carry_in_zero:
140
+
; CHECK: @ %bb.0: @ %entry
141
+
; CHECK-NEXT: movs r1, #0
142
+
; CHECK-NEXT: vmsr fpscr_nzcvqc, r1
143
+
; CHECK-NEXT: vsbc.i32 q0, q0, q1
144
+
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
145
+
; CHECK-NEXT: ubfx r1, r1, #29, #1
146
+
; CHECK-NEXT: str r1, [r0]
147
+
; CHECK-NEXT: bx lr
148
+
entry:
149
+
%0 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i320)
150
+
%1 = extractvalue { <4 x i32>, i32 } %0, 1
151
+
%2 = lshri32%1, 29
152
+
%3 = andi32%2, 1
153
+
storei32%3, ptr%carry_out, align4
154
+
%4 = extractvalue { <4 x i32>, i32 } %0, 0
155
+
ret <4 x i32> %4
156
+
}
157
+
158
+
define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_u32_carry_in_zero(<4 x i32> %a, <4 x i32> %b, ptrnocapture%carry_out) {
159
+
; CHECK-LABEL: test_vsbcq_u32_carry_in_zero:
160
+
; CHECK: @ %bb.0: @ %entry
161
+
; CHECK-NEXT: movs r1, #0
162
+
; CHECK-NEXT: vmsr fpscr_nzcvqc, r1
163
+
; CHECK-NEXT: vsbc.i32 q0, q0, q1
164
+
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
165
+
; CHECK-NEXT: ubfx r1, r1, #29, #1
166
+
; CHECK-NEXT: str r1, [r0]
167
+
; CHECK-NEXT: bx lr
128
168
entry:
129
169
%0 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i320)
130
170
%1 = extractvalue { <4 x i32>, i32 } %0, 1
@@ -196,7 +236,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_m_s32(<4 x i32> %inactive, <4 x i3
196
236
entry:
197
237
%0 = zexti16%ptoi32
198
238
%1 = tailcall <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32%0)
199
-
%2 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i320, <4 x i1> %1)
239
+
%2 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32536870912, <4 x i1> %1)
200
240
%3 = extractvalue { <4 x i32>, i32 } %2, 1
201
241
%4 = lshri32%3, 29
202
242
%5 = andi32%4, 1
@@ -215,6 +255,54 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_m_u32(<4 x i32> %inactive, <4 x i3
215
255
; CHECK-NEXT: ubfx r1, r1, #29, #1
216
256
; CHECK-NEXT: str r1, [r0]
217
257
; CHECK-NEXT: bx lr
258
+
entry:
259
+
%0 = zexti16%ptoi32
260
+
%1 = tailcall <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32%0)
261
+
%2 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32536870912, <4 x i1> %1)
262
+
%3 = extractvalue { <4 x i32>, i32 } %2, 1
263
+
%4 = lshri32%3, 29
264
+
%5 = andi32%4, 1
265
+
storei32%5, ptr%carry_out, align4
266
+
%6 = extractvalue { <4 x i32>, i32 } %2, 0
267
+
ret <4 x i32> %6
268
+
}
269
+
270
+
define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_m_s32_carry_in_zero(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, ptrnocapture%carry_out, i16zeroext%p) {
271
+
; CHECK-LABEL: test_vsbcq_m_s32_carry_in_zero:
272
+
; CHECK: @ %bb.0: @ %entry
273
+
; CHECK-NEXT: movs r2, #0
274
+
; CHECK-NEXT: vmsr p0, r1
275
+
; CHECK-NEXT: vmsr fpscr_nzcvqc, r2
276
+
; CHECK-NEXT: vpst
277
+
; CHECK-NEXT: vsbct.i32 q0, q1, q2
278
+
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
279
+
; CHECK-NEXT: ubfx r1, r1, #29, #1
280
+
; CHECK-NEXT: str r1, [r0]
281
+
; CHECK-NEXT: bx lr
282
+
entry:
283
+
%0 = zexti16%ptoi32
284
+
%1 = tailcall <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32%0)
285
+
%2 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i320, <4 x i1> %1)
286
+
%3 = extractvalue { <4 x i32>, i32 } %2, 1
287
+
%4 = lshri32%3, 29
288
+
%5 = andi32%4, 1
289
+
storei32%5, ptr%carry_out, align4
290
+
%6 = extractvalue { <4 x i32>, i32 } %2, 0
291
+
ret <4 x i32> %6
292
+
}
293
+
294
+
define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_m_u32_carry_in_zero(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, ptrnocapture%carry_out, i16zeroext%p) {
295
+
; CHECK-LABEL: test_vsbcq_m_u32_carry_in_zero:
296
+
; CHECK: @ %bb.0: @ %entry
297
+
; CHECK-NEXT: movs r2, #0
298
+
; CHECK-NEXT: vmsr p0, r1
299
+
; CHECK-NEXT: vmsr fpscr_nzcvqc, r2
300
+
; CHECK-NEXT: vpst
301
+
; CHECK-NEXT: vsbct.i32 q0, q1, q2
302
+
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
303
+
; CHECK-NEXT: ubfx r1, r1, #29, #1
304
+
; CHECK-NEXT: str r1, [r0]
305
+
; CHECK-NEXT: bx lr
218
306
entry:
219
307
%0 = zexti16%ptoi32
220
308
%1 = tailcall <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32%0)
0 commit comments