@@ -39,6 +39,9 @@ declare float @llvm.vp.reduce.fmin.v4f32(float, <4 x float>, <4 x i1>, i32)
39
39
declare float @llvm.vp.reduce.fmax.v4f32 (float , <4 x float >, <4 x i1 >, i32 )
40
40
declare float @llvm.vp.reduce.fadd.v4f32 (float , <4 x float >, <4 x i1 >, i32 )
41
41
declare float @llvm.vp.reduce.fmul.v4f32 (float , <4 x float >, <4 x i1 >, i32 )
42
+ ; Comparisons
43
+ declare <8 x i1 > @llvm.vp.icmp.v8i32 (<8 x i32 >, <8 x i32 >, metadata , <8 x i1 >, i32 )
44
+ declare <8 x i1 > @llvm.vp.fcmp.v8f32 (<8 x float >, <8 x float >, metadata , <8 x i1 >, i32 )
42
45
43
46
; Fixed vector test function.
44
47
define void @test_vp_int_v8 (<8 x i32 > %i0 , <8 x i32 > %i1 , <8 x i32 > %i2 , <8 x i32 > %f3 , <8 x i1 > %m , i32 %n ) {
@@ -121,6 +124,14 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n
121
124
ret void
122
125
}
123
126
127
+ define void @test_vp_cmp_v8 (<8 x i32 > %i0 , <8 x i32 > %i1 , <8 x float > %f0 , <8 x float > %f1 , <8 x i1 > %m , i32 %n ) {
128
+ %r0 = call <8 x i1 > @llvm.vp.icmp.v8i32 (<8 x i32 > %i0 , <8 x i32 > %i1 , metadata !"eq" , <8 x i1 > %m , i32 %n )
129
+ %r1 = call <8 x i1 > @llvm.vp.icmp.v8i32 (<8 x i32 > %i0 , <8 x i32 > %i1 , metadata !"slt" , <8 x i1 > %m , i32 %n )
130
+ %r2 = call <8 x i1 > @llvm.vp.fcmp.v8f32 (<8 x float > %f0 , <8 x float > %f1 , metadata !"oeq" , <8 x i1 > %m , i32 %n )
131
+ %r3 = call <8 x i1 > @llvm.vp.fcmp.v8f32 (<8 x float > %f0 , <8 x float > %f1 , metadata !"ult" , <8 x i1 > %m , i32 %n )
132
+ ret void
133
+ }
134
+
124
135
; All VP intrinsics have to be lowered into non-VP ops
125
136
; Convert %evl into %mask for non-speculatable VP intrinsics and emit the
126
137
; instruction+select idiom with a non-VP SIMD instruction.
@@ -233,6 +244,15 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n
233
244
; ALL-CONVERT-NEXT: %{{.+}} = call reassoc float @llvm.vector.reduce.fmul.v4f32(float %f, <4 x float> [[FMUL]])
234
245
; ALL-CONVERT-NEXT: ret void
235
246
247
+ ; Check that comparisons use the correct condition codes
248
+ ; ALL-CONVERT: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
249
+ ; ALL-CONVERT-NEXT: %{{.+}} = icmp eq <8 x i32> %i0, %i1
250
+ ; ALL-CONVERT-NEXT: %{{.+}} = icmp slt <8 x i32> %i0, %i1
251
+ ; ALL-CONVERT-NEXT: %{{.+}} = fcmp oeq <8 x float> %f0, %f1
252
+ ; ALL-CONVERT-NEXT: %{{.+}} = fcmp ult <8 x float> %f0, %f1
253
+ ; ALL-CONVERT-NEXT: ret void
254
+
255
+
236
256
; All legal - don't transform anything.
237
257
238
258
; LEGAL_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
@@ -292,6 +312,13 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n
292
312
; LEGAL_LEGAL-NEXT: %r9 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
293
313
; LEGAL_LEGAL-NEXT: ret void
294
314
315
+ ; LEGAL_LEGAL: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
316
+ ; LEGAL_LEGAL-NEXT: %r0 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> %m, i32 %n)
317
+ ; LEGAL_LEGAL-NEXT: %r1 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"slt", <8 x i1> %m, i32 %n)
318
+ ; LEGAL_LEGAL-NEXT: %r2 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %m, i32 %n)
319
+ ; LEGAL_LEGAL-NEXT: %r3 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"ult", <8 x i1> %m, i32 %n)
320
+ ; LEGAL_LEGAL-NEXT: ret void
321
+
295
322
; Drop %evl where possible else fold %evl into %mask (%evl Discard, %mask Legal)
296
323
;
297
324
; There is no caching yet in the ExpandVectorPredication pass and the %evl
@@ -372,6 +399,12 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n
372
399
; DISCARD_LEGAL-NOT: %r9 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
373
400
; DISCARD_LEGAL: ret void
374
401
402
+ ; DISCARD_LEGAL: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
403
+ ; DISCARD_LEGAL-NEXT: %r0 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> %m, i32 8)
404
+ ; DISCARD_LEGAL-NEXT: %r1 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"slt", <8 x i1> %m, i32 8)
405
+ ; DISCARD_LEGAL-NEXT: %r2 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %m, i32 8)
406
+ ; DISCARD_LEGAL-NEXT: %r3 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"ult", <8 x i1> %m, i32 8)
407
+
375
408
; Convert %evl into %mask everywhere (%evl Convert, %mask Legal)
376
409
;
377
410
; For the same reasons as in the (%evl Discard, %mask Legal) case only check that..
@@ -441,3 +474,15 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n
441
474
; CONVERT_LEGAL-NOT: %{{.+}} = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
442
475
; CONVERT_LEGAL-NOT: %{{.+}} = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
443
476
; CONVERT_LEGAL: ret void
477
+
478
+ ; CONVERT_LEGAL: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
479
+ ; CONVERT_LEGAL-NEXT: [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
480
+ ; CONVERT_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer
481
+ ; CONVERT_LEGAL-NEXT: [[EVLM:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
482
+ ; CONVERT_LEGAL-NEXT: [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m
483
+ ; CONVERT_LEGAL-NEXT: %{{.+}} = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> [[NEWM]], i32 8)
484
+ ; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> %m, i32 %n)
485
+ ; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"slt", <8 x i1> %m, i32 %n
486
+ ; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %m, i32 %n)
487
+ ; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"ult", <8 x i1> %m, i32 %n)
488
+ ; CONVERT_LEGAL: ret void
0 commit comments