@@ -379,3 +379,97 @@ define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i3
379
379
%4 = uitofp <4 x i32 > %3 to <4 x float >
380
380
ret <4 x float > %4
381
381
}
382
+
383
+ define <4 x float > @knownbits_smax_smin_shuffle_uitofp (<4 x i32 > %a0 ) {
384
+ ; X32-LABEL: knownbits_smax_smin_shuffle_uitofp:
385
+ ; X32: # BB#0:
386
+ ; X32-NEXT: vpminsd {{\.LCPI.*}}, %xmm0, %xmm0
387
+ ; X32-NEXT: vpmaxsd {{\.LCPI.*}}, %xmm0, %xmm0
388
+ ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
389
+ ; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
390
+ ; X32-NEXT: vpsrld $16, %xmm0, %xmm0
391
+ ; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
392
+ ; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
393
+ ; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
394
+ ; X32-NEXT: retl
395
+ ;
396
+ ; X64-LABEL: knownbits_smax_smin_shuffle_uitofp:
397
+ ; X64: # BB#0:
398
+ ; X64-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0
399
+ ; X64-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
400
+ ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
401
+ ; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
402
+ ; X64-NEXT: vpsrld $16, %xmm0, %xmm0
403
+ ; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
404
+ ; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
405
+ ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
406
+ ; X64-NEXT: retq
407
+ %1 = call <4 x i32 > @llvm.x86.sse41.pminsd (<4 x i32 > %a0 , <4 x i32 > <i32 0 , i32 -65535 , i32 -65535 , i32 0 >)
408
+ %2 = call <4 x i32 > @llvm.x86.sse41.pmaxsd (<4 x i32 > %1 , <4 x i32 > <i32 65535 , i32 -1 , i32 -1 , i32 131071 >)
409
+ %3 = shufflevector <4 x i32 > %2 , <4 x i32 > undef , <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 3 >
410
+ %4 = uitofp <4 x i32 > %3 to <4 x float >
411
+ ret <4 x float > %4
412
+ }
413
+ declare <4 x i32 > @llvm.x86.sse41.pminsd (<4 x i32 >, <4 x i32 >) nounwind readnone
414
+ declare <4 x i32 > @llvm.x86.sse41.pmaxsd (<4 x i32 >, <4 x i32 >) nounwind readnone
415
+
416
+ define <4 x float > @knownbits_umax_shuffle_uitofp (<4 x i32 > %a0 ) {
417
+ ; X32-LABEL: knownbits_umax_shuffle_uitofp:
418
+ ; X32: # BB#0:
419
+ ; X32-NEXT: vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0
420
+ ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
421
+ ; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
422
+ ; X32-NEXT: vpsrld $16, %xmm0, %xmm0
423
+ ; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
424
+ ; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
425
+ ; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
426
+ ; X32-NEXT: retl
427
+ ;
428
+ ; X64-LABEL: knownbits_umax_shuffle_uitofp:
429
+ ; X64: # BB#0:
430
+ ; X64-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
431
+ ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
432
+ ; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
433
+ ; X64-NEXT: vpsrld $16, %xmm0, %xmm0
434
+ ; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
435
+ ; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
436
+ ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
437
+ ; X64-NEXT: retq
438
+ %1 = call <4 x i32 > @llvm.x86.sse41.pmaxud (<4 x i32 > %a0 , <4 x i32 > <i32 65535 , i32 -1 , i32 -1 , i32 262143 >)
439
+ %2 = shufflevector <4 x i32 > %1 , <4 x i32 > undef , <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 3 >
440
+ %3 = uitofp <4 x i32 > %2 to <4 x float >
441
+ ret <4 x float > %3
442
+ }
443
+ declare <4 x i32 > @llvm.x86.sse41.pmaxud (<4 x i32 >, <4 x i32 >) nounwind readnone
444
+
445
+ define <4 x float > @knownbits_umin_shl_shuffle_uitofp (<4 x i32 > %a0 ) {
446
+ ; X32-LABEL: knownbits_umin_shl_shuffle_uitofp:
447
+ ; X32: # BB#0:
448
+ ; X32-NEXT: vpminud {{\.LCPI.*}}, %xmm0, %xmm0
449
+ ; X32-NEXT: vpslld $16, %xmm0, %xmm0
450
+ ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
451
+ ; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
452
+ ; X32-NEXT: vpsrld $16, %xmm0, %xmm0
453
+ ; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
454
+ ; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
455
+ ; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0
456
+ ; X32-NEXT: retl
457
+ ;
458
+ ; X64-LABEL: knownbits_umin_shl_shuffle_uitofp:
459
+ ; X64: # BB#0:
460
+ ; X64-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
461
+ ; X64-NEXT: vpslld $16, %xmm0, %xmm0
462
+ ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
463
+ ; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
464
+ ; X64-NEXT: vpsrld $16, %xmm0, %xmm0
465
+ ; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
466
+ ; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
467
+ ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0
468
+ ; X64-NEXT: retq
469
+ %1 = call <4 x i32 > @llvm.x86.sse41.pminud (<4 x i32 > %a0 , <4 x i32 > <i32 65536 , i32 -1 , i32 -1 , i32 262143 >)
470
+ %2 = shl <4 x i32 > %1 , <i32 16 , i32 16 , i32 16 , i32 16 >
471
+ %3 = shufflevector <4 x i32 > %2 , <4 x i32 > undef , <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 3 >
472
+ %4 = uitofp <4 x i32 > %3 to <4 x float >
473
+ ret <4 x float > %4
474
+ }
475
+ declare <4 x i32 > @llvm.x86.sse41.pminud (<4 x i32 >, <4 x i32 >) nounwind readnone
0 commit comments