@@ -1527,30 +1527,13 @@ define void @addus_v8i8(<8 x i8>* %p1, <8 x i8>* %p2) {
1527
1527
; SSE-NEXT: movq %xmm1, (%rdi)
1528
1528
; SSE-NEXT: retq
1529
1529
;
1530
- ; AVX1-LABEL: addus_v8i8:
1531
- ; AVX1: # %bb.0:
1532
- ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1533
- ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1534
- ; AVX1-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
1535
- ; AVX1-NEXT: vmovq %xmm0, (%rdi)
1536
- ; AVX1-NEXT: retq
1537
- ;
1538
- ; AVX2-LABEL: addus_v8i8:
1539
- ; AVX2: # %bb.0:
1540
- ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1541
- ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1542
- ; AVX2-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
1543
- ; AVX2-NEXT: vmovq %xmm0, (%rdi)
1544
- ; AVX2-NEXT: retq
1545
- ;
1546
- ; AVX512-LABEL: addus_v8i8:
1547
- ; AVX512: # %bb.0:
1548
- ; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1549
- ; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1550
- ; AVX512-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
1551
- ; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1552
- ; AVX512-NEXT: vpmovwb %xmm0, (%rdi)
1553
- ; AVX512-NEXT: retq
1530
+ ; AVX-LABEL: addus_v8i8:
1531
+ ; AVX: # %bb.0:
1532
+ ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1533
+ ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1534
+ ; AVX-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
1535
+ ; AVX-NEXT: vmovq %xmm0, (%rdi)
1536
+ ; AVX-NEXT: retq
1554
1537
%ld1 = load <8 x i8 >, <8 x i8 >* %p1 , align 8
1555
1538
%ld2 = load <8 x i8 >, <8 x i8 >* %p2 , align 8
1556
1539
%1 = add <8 x i8 > %ld2 , %ld1
@@ -1569,30 +1552,13 @@ define void @addus_v4i8(<4 x i8>* %p1, <4 x i8>* %p2) {
1569
1552
; SSE-NEXT: movd %xmm1, (%rdi)
1570
1553
; SSE-NEXT: retq
1571
1554
;
1572
- ; AVX1-LABEL: addus_v4i8:
1573
- ; AVX1: # %bb.0:
1574
- ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1575
- ; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1576
- ; AVX1-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
1577
- ; AVX1-NEXT: vmovd %xmm0, (%rdi)
1578
- ; AVX1-NEXT: retq
1579
- ;
1580
- ; AVX2-LABEL: addus_v4i8:
1581
- ; AVX2: # %bb.0:
1582
- ; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1583
- ; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1584
- ; AVX2-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
1585
- ; AVX2-NEXT: vmovd %xmm0, (%rdi)
1586
- ; AVX2-NEXT: retq
1587
- ;
1588
- ; AVX512-LABEL: addus_v4i8:
1589
- ; AVX512: # %bb.0:
1590
- ; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1591
- ; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1592
- ; AVX512-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
1593
- ; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1594
- ; AVX512-NEXT: vpmovdb %xmm0, (%rdi)
1595
- ; AVX512-NEXT: retq
1555
+ ; AVX-LABEL: addus_v4i8:
1556
+ ; AVX: # %bb.0:
1557
+ ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1558
+ ; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1559
+ ; AVX-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
1560
+ ; AVX-NEXT: vmovd %xmm0, (%rdi)
1561
+ ; AVX-NEXT: retq
1596
1562
%ld1 = load <4 x i8 >, <4 x i8 >* %p1 , align 4
1597
1563
%ld2 = load <4 x i8 >, <4 x i8 >* %p2 , align 4
1598
1564
%1 = add <4 x i8 > %ld2 , %ld1
@@ -1635,36 +1601,15 @@ define void @addus_v2i8(<2 x i8>* %p1, <2 x i8>* %p2) {
1635
1601
; SSE41-NEXT: pextrw $0, %xmm1, (%rdi)
1636
1602
; SSE41-NEXT: retq
1637
1603
;
1638
- ; AVX1-LABEL: addus_v2i8:
1639
- ; AVX1: # %bb.0:
1640
- ; AVX1-NEXT: movzwl (%rdi), %eax
1641
- ; AVX1-NEXT: vmovd %eax, %xmm0
1642
- ; AVX1-NEXT: movzwl (%rsi), %eax
1643
- ; AVX1-NEXT: vmovd %eax, %xmm1
1644
- ; AVX1-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
1645
- ; AVX1-NEXT: vpextrw $0, %xmm0, (%rdi)
1646
- ; AVX1-NEXT: retq
1647
- ;
1648
- ; AVX2-LABEL: addus_v2i8:
1649
- ; AVX2: # %bb.0:
1650
- ; AVX2-NEXT: movzwl (%rdi), %eax
1651
- ; AVX2-NEXT: vmovd %eax, %xmm0
1652
- ; AVX2-NEXT: movzwl (%rsi), %eax
1653
- ; AVX2-NEXT: vmovd %eax, %xmm1
1654
- ; AVX2-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
1655
- ; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
1656
- ; AVX2-NEXT: retq
1657
- ;
1658
- ; AVX512-LABEL: addus_v2i8:
1659
- ; AVX512: # %bb.0:
1660
- ; AVX512-NEXT: movzwl (%rdi), %eax
1661
- ; AVX512-NEXT: vmovd %eax, %xmm0
1662
- ; AVX512-NEXT: movzwl (%rsi), %eax
1663
- ; AVX512-NEXT: vmovd %eax, %xmm1
1664
- ; AVX512-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
1665
- ; AVX512-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1666
- ; AVX512-NEXT: vpmovqb %xmm0, (%rdi)
1667
- ; AVX512-NEXT: retq
1604
+ ; AVX-LABEL: addus_v2i8:
1605
+ ; AVX: # %bb.0:
1606
+ ; AVX-NEXT: movzwl (%rdi), %eax
1607
+ ; AVX-NEXT: vmovd %eax, %xmm0
1608
+ ; AVX-NEXT: movzwl (%rsi), %eax
1609
+ ; AVX-NEXT: vmovd %eax, %xmm1
1610
+ ; AVX-NEXT: vpaddusb %xmm0, %xmm1, %xmm0
1611
+ ; AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
1612
+ ; AVX-NEXT: retq
1668
1613
%ld1 = load <2 x i8 >, <2 x i8 >* %p1 , align 2
1669
1614
%ld2 = load <2 x i8 >, <2 x i8 >* %p2 , align 2
1670
1615
%1 = add <2 x i8 > %ld2 , %ld1
@@ -1683,30 +1628,13 @@ define void @addus_v4i16(<4 x i16>* %p1, <4 x i16>* %p2) {
1683
1628
; SSE-NEXT: movq %xmm1, (%rdi)
1684
1629
; SSE-NEXT: retq
1685
1630
;
1686
- ; AVX1-LABEL: addus_v4i16:
1687
- ; AVX1: # %bb.0:
1688
- ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1689
- ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1690
- ; AVX1-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
1691
- ; AVX1-NEXT: vmovq %xmm0, (%rdi)
1692
- ; AVX1-NEXT: retq
1693
- ;
1694
- ; AVX2-LABEL: addus_v4i16:
1695
- ; AVX2: # %bb.0:
1696
- ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1697
- ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1698
- ; AVX2-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
1699
- ; AVX2-NEXT: vmovq %xmm0, (%rdi)
1700
- ; AVX2-NEXT: retq
1701
- ;
1702
- ; AVX512-LABEL: addus_v4i16:
1703
- ; AVX512: # %bb.0:
1704
- ; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1705
- ; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1706
- ; AVX512-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
1707
- ; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1708
- ; AVX512-NEXT: vpmovdw %xmm0, (%rdi)
1709
- ; AVX512-NEXT: retq
1631
+ ; AVX-LABEL: addus_v4i16:
1632
+ ; AVX: # %bb.0:
1633
+ ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1634
+ ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1635
+ ; AVX-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
1636
+ ; AVX-NEXT: vmovq %xmm0, (%rdi)
1637
+ ; AVX-NEXT: retq
1710
1638
%ld1 = load <4 x i16 >, <4 x i16 >* %p1 , align 4
1711
1639
%ld2 = load <4 x i16 >, <4 x i16 >* %p2 , align 4
1712
1640
%1 = add <4 x i16 > %ld2 , %ld1
@@ -1725,30 +1653,13 @@ define void @addus_v2i16(<2 x i16>* %p1, <2 x i16>* %p2) {
1725
1653
; SSE-NEXT: movd %xmm1, (%rdi)
1726
1654
; SSE-NEXT: retq
1727
1655
;
1728
- ; AVX1-LABEL: addus_v2i16:
1729
- ; AVX1: # %bb.0:
1730
- ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1731
- ; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1732
- ; AVX1-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
1733
- ; AVX1-NEXT: vmovd %xmm0, (%rdi)
1734
- ; AVX1-NEXT: retq
1735
- ;
1736
- ; AVX2-LABEL: addus_v2i16:
1737
- ; AVX2: # %bb.0:
1738
- ; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1739
- ; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1740
- ; AVX2-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
1741
- ; AVX2-NEXT: vmovd %xmm0, (%rdi)
1742
- ; AVX2-NEXT: retq
1743
- ;
1744
- ; AVX512-LABEL: addus_v2i16:
1745
- ; AVX512: # %bb.0:
1746
- ; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1747
- ; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1748
- ; AVX512-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
1749
- ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1750
- ; AVX512-NEXT: vpmovqw %xmm0, (%rdi)
1751
- ; AVX512-NEXT: retq
1656
+ ; AVX-LABEL: addus_v2i16:
1657
+ ; AVX: # %bb.0:
1658
+ ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1659
+ ; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1660
+ ; AVX-NEXT: vpaddusw %xmm0, %xmm1, %xmm0
1661
+ ; AVX-NEXT: vmovd %xmm0, (%rdi)
1662
+ ; AVX-NEXT: retq
1752
1663
%ld1 = load <2 x i16 >, <2 x i16 >* %p1 , align 2
1753
1664
%ld2 = load <2 x i16 >, <2 x i16 >* %p2 , align 2
1754
1665
%1 = add <2 x i16 > %ld2 , %ld1
0 commit comments