@@ -435,3 +435,100 @@ define <8 x float> @constant_fold_vpermilvar_ps_256() {
435
435
%1 = call <8 x float > @llvm.x86.avx.vpermilvar.ps.256 (<8 x float > <float 1 .0 , float 2 .0 , float 3 .0 , float 4 .0 , float 5 .0 , float 6 .0 , float 7 .0 , float 8 .0 >, <8 x i32 > <i32 4 , i32 0 , i32 2 , i32 1 , i32 0 , i32 1 , i32 1 , i32 1 >)
436
436
ret <8 x float > %1
437
437
}
438
+
439
+ define void @PR39483 () {
440
+ ; X32-AVX1-LABEL: PR39483:
441
+ ; X32-AVX1: # %bb.0: # %entry
442
+ ; X32-AVX1-NEXT: vmovups 32, %ymm0
443
+ ; X32-AVX1-NEXT: vmovups 64, %ymm1
444
+ ; X32-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
445
+ ; X32-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,3]
446
+ ; X32-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
447
+ ; X32-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3,4],mem[5],ymm0[6,7]
448
+ ; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
449
+ ; X32-AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3]
450
+ ; X32-AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,0,3]
451
+ ; X32-AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
452
+ ; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
453
+ ; X32-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7]
454
+ ; X32-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
455
+ ; X32-AVX1-NEXT: vmulps %ymm1, %ymm0, %ymm0
456
+ ; X32-AVX1-NEXT: vaddps %ymm1, %ymm0, %ymm0
457
+ ; X32-AVX1-NEXT: vmovups %ymm0, (%eax)
458
+ ;
459
+ ; X32-AVX2-LABEL: PR39483:
460
+ ; X32-AVX2: # %bb.0: # %entry
461
+ ; X32-AVX2-NEXT: vmovups 32, %ymm0
462
+ ; X32-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3,4],mem[5],ymm0[6,7]
463
+ ; X32-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <2,5,0,3,6,u,u,u>
464
+ ; X32-AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
465
+ ; X32-AVX2-NEXT: vpermilps {{.*#+}} ymm1 = mem[0,1,0,3,4,5,4,7]
466
+ ; X32-AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,3]
467
+ ; X32-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7]
468
+ ; X32-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
469
+ ; X32-AVX2-NEXT: vmulps %ymm1, %ymm0, %ymm0
470
+ ; X32-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
471
+ ; X32-AVX2-NEXT: vmovups %ymm0, (%eax)
472
+ ;
473
+ ; X32-AVX512-LABEL: PR39483:
474
+ ; X32-AVX512: # %bb.0: # %entry
475
+ ; X32-AVX512-NEXT: vmovups 0, %zmm0
476
+ ; X32-AVX512-NEXT: vmovups 64, %ymm1
477
+ ; X32-AVX512-NEXT: vmovaps {{.*#+}} zmm2 = <2,5,8,11,14,17,20,23,u,u,u,u,u,u,u,u>
478
+ ; X32-AVX512-NEXT: vpermi2ps %zmm1, %zmm0, %zmm2
479
+ ; X32-AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
480
+ ; X32-AVX512-NEXT: vmulps %ymm0, %ymm2, %ymm1
481
+ ; X32-AVX512-NEXT: vaddps %ymm0, %ymm1, %ymm0
482
+ ; X32-AVX512-NEXT: vmovups %ymm0, (%eax)
483
+ ;
484
+ ; X64-AVX1-LABEL: PR39483:
485
+ ; X64-AVX1: # %bb.0: # %entry
486
+ ; X64-AVX1-NEXT: vmovups 32, %ymm0
487
+ ; X64-AVX1-NEXT: vmovups 64, %ymm1
488
+ ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
489
+ ; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,3]
490
+ ; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
491
+ ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3,4],mem[5],ymm0[6,7]
492
+ ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
493
+ ; X64-AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3]
494
+ ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,0,3]
495
+ ; X64-AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
496
+ ; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
497
+ ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7]
498
+ ; X64-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
499
+ ; X64-AVX1-NEXT: vmulps %ymm1, %ymm0, %ymm0
500
+ ; X64-AVX1-NEXT: vaddps %ymm1, %ymm0, %ymm0
501
+ ; X64-AVX1-NEXT: vmovups %ymm0, (%rax)
502
+ ;
503
+ ; X64-AVX2-LABEL: PR39483:
504
+ ; X64-AVX2: # %bb.0: # %entry
505
+ ; X64-AVX2-NEXT: vmovups 32, %ymm0
506
+ ; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3,4],mem[5],ymm0[6,7]
507
+ ; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <2,5,0,3,6,u,u,u>
508
+ ; X64-AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
509
+ ; X64-AVX2-NEXT: vpermilps {{.*#+}} ymm1 = mem[0,1,0,3,4,5,4,7]
510
+ ; X64-AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,3]
511
+ ; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7]
512
+ ; X64-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
513
+ ; X64-AVX2-NEXT: vmulps %ymm1, %ymm0, %ymm0
514
+ ; X64-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
515
+ ; X64-AVX2-NEXT: vmovups %ymm0, (%rax)
516
+ ;
517
+ ; X64-AVX512-LABEL: PR39483:
518
+ ; X64-AVX512: # %bb.0: # %entry
519
+ ; X64-AVX512-NEXT: vmovups 0, %zmm0
520
+ ; X64-AVX512-NEXT: vmovups 64, %ymm1
521
+ ; X64-AVX512-NEXT: vmovaps {{.*#+}} zmm2 = <2,5,8,11,14,17,20,23,u,u,u,u,u,u,u,u>
522
+ ; X64-AVX512-NEXT: vpermi2ps %zmm1, %zmm0, %zmm2
523
+ ; X64-AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
524
+ ; X64-AVX512-NEXT: vmulps %ymm0, %ymm2, %ymm1
525
+ ; X64-AVX512-NEXT: vaddps %ymm0, %ymm1, %ymm0
526
+ ; X64-AVX512-NEXT: vmovups %ymm0, (%rax)
527
+ entry:
528
+ %wide.vec = load <24 x float >, <24 x float >* null , align 4
529
+ %strided.vec18 = shufflevector <24 x float > %wide.vec , <24 x float > undef , <8 x i32 > <i32 2 , i32 5 , i32 8 , i32 11 , i32 14 , i32 17 , i32 20 , i32 23 >
530
+ %0 = fmul <8 x float > %strided.vec18 , zeroinitializer
531
+ %1 = fadd <8 x float > zeroinitializer , %0
532
+ store <8 x float > %1 , <8 x float >* undef , align 16
533
+ unreachable
534
+ }
0 commit comments