@@ -282,6 +282,37 @@ define <2 x i64> @sel_v16i8_sse_reality(ptr nocapture readonly %x, <2 x i64> %y,
282
282
ret <2 x i64 > %rcast
283
283
}
284
284
285
+ define <4 x float > @sel_v16i8_bitcast_shuffle_bitcast_cmp (<8 x float > %a , <8 x float > %b , <8 x float > %c , <8 x float > %d ) {
286
+ ; CHECK-LABEL: @sel_v16i8_bitcast_shuffle_bitcast_cmp(
287
+ ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[A:%.*]], [[B:%.*]]
288
+ ; CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
289
+ ; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x float> [[A]] to <8 x i32>
290
+ ; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x float> [[B]] to <8 x i32>
291
+ ; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <8 x i32> [[SEXT]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
292
+ ; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <8 x i32> [[A_BC]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
293
+ ; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <8 x i32> [[B_BC]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
294
+ ; CHECK-NEXT: [[A_LO_BC:%.*]] = bitcast <4 x i32> [[A_LO]] to <16 x i8>
295
+ ; CHECK-NEXT: [[B_LO_BC:%.*]] = bitcast <4 x i32> [[B_LO]] to <16 x i8>
296
+ ; CHECK-NEXT: [[SEXT_LO_BC:%.*]] = bitcast <4 x i32> [[SEXT_LO]] to <16 x i8>
297
+ ; CHECK-NEXT: [[BLENDV:%.*]] = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_LO_BC]], <16 x i8> [[B_LO_BC]], <16 x i8> [[SEXT_LO_BC]])
298
+ ; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i8> [[BLENDV]] to <4 x float>
299
+ ; CHECK-NEXT: ret <4 x float> [[RES]]
300
+ ;
301
+ %cmp = fcmp olt <8 x float > %a , %b
302
+ %sext = sext <8 x i1 > %cmp to <8 x i32 >
303
+ %a.bc = bitcast <8 x float > %a to <8 x i32 >
304
+ %b.bc = bitcast <8 x float > %b to <8 x i32 >
305
+ %sext.lo = shufflevector <8 x i32 > %sext , <8 x i32 > poison, <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
306
+ %a.lo = shufflevector <8 x i32 > %a.bc , <8 x i32 > poison, <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
307
+ %b.lo = shufflevector <8 x i32 > %b.bc , <8 x i32 > poison, <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
308
+ %a.lo.bc = bitcast <4 x i32 > %a.lo to <16 x i8 >
309
+ %b.lo.bc = bitcast <4 x i32 > %b.lo to <16 x i8 >
310
+ %sext.lo.bc = bitcast <4 x i32 > %sext.lo to <16 x i8 >
311
+ %blendv = call <16 x i8 > @llvm.x86.sse41.pblendvb (<16 x i8 > %a.lo.bc , <16 x i8 > %b.lo.bc , <16 x i8 > %sext.lo.bc )
312
+ %res = bitcast <16 x i8 > %blendv to <4 x float >
313
+ ret <4 x float > %res
314
+ }
315
+
285
316
declare <16 x i8 > @llvm.x86.sse41.pblendvb (<16 x i8 >, <16 x i8 >, <16 x i8 >)
286
317
declare <4 x float > @llvm.x86.sse41.blendvps (<4 x float >, <4 x float >, <4 x float >)
287
318
declare <2 x double > @llvm.x86.sse41.blendvpd (<2 x double >, <2 x double >, <2 x double >)
0 commit comments