@@ -1592,3 +1592,89 @@ entry:
1592
1592
%1 = bitcast <8 x i8 > %0 to i64
1593
1593
ret i64 %1
1594
1594
}
1595
+
1596
+ define void @foo (<4 x i64 > %a , <4 x i64 > %b , ptr %p ) "min-legal-vector-width" ="256" "prefer-vector-width" ="256" {
1597
+ ; SSE-LABEL: foo:
1598
+ ; SSE: # %bb.0: # %entry
1599
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1600
+ ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
1601
+ ; SSE-NEXT: movaps %xmm2, 16(%rdi)
1602
+ ; SSE-NEXT: movaps %xmm0, (%rdi)
1603
+ ; SSE-NEXT: retq
1604
+ ;
1605
+ ; AVX1-LABEL: foo:
1606
+ ; AVX1: # %bb.0: # %entry
1607
+ ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1608
+ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1609
+ ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
1610
+ ; AVX1-NEXT: vmovups %ymm0, (%rdi)
1611
+ ; AVX1-NEXT: vzeroupper
1612
+ ; AVX1-NEXT: retq
1613
+ ;
1614
+ ; AVX2-SLOW-LABEL: foo:
1615
+ ; AVX2-SLOW: # %bb.0: # %entry
1616
+ ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm2
1617
+ ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
1618
+ ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm2
1619
+ ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
1620
+ ; AVX2-SLOW-NEXT: vmovaps %xmm1, 16(%rdi)
1621
+ ; AVX2-SLOW-NEXT: vmovaps %xmm0, (%rdi)
1622
+ ; AVX2-SLOW-NEXT: vzeroupper
1623
+ ; AVX2-SLOW-NEXT: retq
1624
+ ;
1625
+ ; AVX2-FAST-ALL-LABEL: foo:
1626
+ ; AVX2-FAST-ALL: # %bb.0: # %entry
1627
+ ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
1628
+ ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
1629
+ ; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
1630
+ ; AVX2-FAST-ALL-NEXT: vmovaps %xmm1, 16(%rdi)
1631
+ ; AVX2-FAST-ALL-NEXT: vmovaps %xmm0, (%rdi)
1632
+ ; AVX2-FAST-ALL-NEXT: vzeroupper
1633
+ ; AVX2-FAST-ALL-NEXT: retq
1634
+ ;
1635
+ ; AVX2-FAST-PERLANE-LABEL: foo:
1636
+ ; AVX2-FAST-PERLANE: # %bb.0: # %entry
1637
+ ; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm2
1638
+ ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
1639
+ ; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm1, %xmm2
1640
+ ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
1641
+ ; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm1, 16(%rdi)
1642
+ ; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm0, (%rdi)
1643
+ ; AVX2-FAST-PERLANE-NEXT: vzeroupper
1644
+ ; AVX2-FAST-PERLANE-NEXT: retq
1645
+ ;
1646
+ ; AVX512F-LABEL: foo:
1647
+ ; AVX512F: # %bb.0: # %entry
1648
+ ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1649
+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1650
+ ; AVX512F-NEXT: vpmovqd %zmm0, (%rdi)
1651
+ ; AVX512F-NEXT: vzeroupper
1652
+ ; AVX512F-NEXT: retq
1653
+ ;
1654
+ ; AVX512VL-LABEL: foo:
1655
+ ; AVX512VL: # %bb.0: # %entry
1656
+ ; AVX512VL-NEXT: vpmovqd %ymm1, 16(%rdi)
1657
+ ; AVX512VL-NEXT: vpmovqd %ymm0, (%rdi)
1658
+ ; AVX512VL-NEXT: vzeroupper
1659
+ ; AVX512VL-NEXT: retq
1660
+ ;
1661
+ ; AVX512BW-LABEL: foo:
1662
+ ; AVX512BW: # %bb.0: # %entry
1663
+ ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1664
+ ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1665
+ ; AVX512BW-NEXT: vpmovqd %zmm0, (%rdi)
1666
+ ; AVX512BW-NEXT: vzeroupper
1667
+ ; AVX512BW-NEXT: retq
1668
+ ;
1669
+ ; AVX512BWVL-LABEL: foo:
1670
+ ; AVX512BWVL: # %bb.0: # %entry
1671
+ ; AVX512BWVL-NEXT: vpmovqd %ymm1, 16(%rdi)
1672
+ ; AVX512BWVL-NEXT: vpmovqd %ymm0, (%rdi)
1673
+ ; AVX512BWVL-NEXT: vzeroupper
1674
+ ; AVX512BWVL-NEXT: retq
1675
+ entry:
1676
+ %0 = shufflevector <4 x i64 > %a , <4 x i64 > %b , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
1677
+ %1 = trunc nsw <8 x i64 > %0 to <8 x i32 >
1678
+ store <8 x i32 > %1 , ptr %p , align 16
1679
+ ret void
1680
+ }
0 commit comments