@@ -1121,12 +1121,7 @@ define <4 x i32> @combine_nested_undef_test28(<4 x i32> %A, <4 x i32> %B) {
1121
1121
define <4 x float > @combine_test1 (<4 x float > %a , <4 x float > %b ) {
1122
1122
; SSE2-LABEL: combine_test1:
1123
1123
; SSE2: # BB#0:
1124
- ; SSE2-NEXT: movaps %xmm1, %xmm2
1125
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1126
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1127
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,3]
1128
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1129
- ; SSE2-NEXT: movaps %xmm2, %xmm0
1124
+ ; SSE2-NEXT: movaps %xmm1, %xmm0
1130
1125
; SSE2-NEXT: retq
1131
1126
;
1132
1127
; SSSE3-LABEL: combine_test1:
@@ -1248,12 +1243,7 @@ define <4 x float> @combine_test5(<4 x float> %a, <4 x float> %b) {
1248
1243
define <4 x i32 > @combine_test6 (<4 x i32 > %a , <4 x i32 > %b ) {
1249
1244
; SSE2-LABEL: combine_test6:
1250
1245
; SSE2: # BB#0:
1251
- ; SSE2-NEXT: movaps %xmm1, %xmm2
1252
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1253
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1254
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,3]
1255
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1256
- ; SSE2-NEXT: movaps %xmm2, %xmm0
1246
+ ; SSE2-NEXT: movaps %xmm1, %xmm0
1257
1247
; SSE2-NEXT: retq
1258
1248
;
1259
1249
; SSSE3-LABEL: combine_test6:
@@ -1601,21 +1591,13 @@ define <4 x i32> @combine_test20(<4 x i32> %a, <4 x i32> %b) {
1601
1591
define <4 x float > @combine_test1b (<4 x float > %a , <4 x float > %b ) {
1602
1592
; SSE2-LABEL: combine_test1b:
1603
1593
; SSE2: # BB#0:
1604
- ; SSE2-NEXT: movaps %xmm1, %xmm2
1605
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1606
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1607
- ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[0,0]
1608
- ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[2,0]
1594
+ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,2,0]
1609
1595
; SSE2-NEXT: movaps %xmm1, %xmm0
1610
1596
; SSE2-NEXT: retq
1611
1597
;
1612
1598
; SSSE3-LABEL: combine_test1b:
1613
1599
; SSSE3: # BB#0:
1614
- ; SSSE3-NEXT: movaps %xmm1, %xmm2
1615
- ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1616
- ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1617
- ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[0,0]
1618
- ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[2,0]
1600
+ ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,2,0]
1619
1601
; SSSE3-NEXT: movaps %xmm1, %xmm0
1620
1602
; SSSE3-NEXT: retq
1621
1603
;
@@ -1637,36 +1619,25 @@ define <4 x float> @combine_test1b(<4 x float> %a, <4 x float> %b) {
1637
1619
define <4 x float > @combine_test2b (<4 x float > %a , <4 x float > %b ) {
1638
1620
; SSE2-LABEL: combine_test2b:
1639
1621
; SSE2: # BB#0:
1640
- ; SSE2-NEXT: movaps %xmm1, %xmm2
1641
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1642
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1643
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[1,1]
1644
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1645
- ; SSE2-NEXT: movaps %xmm2, %xmm0
1622
+ ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
1623
+ ; SSE2-NEXT: movaps %xmm1, %xmm0
1646
1624
; SSE2-NEXT: retq
1647
1625
;
1648
1626
; SSSE3-LABEL: combine_test2b:
1649
1627
; SSSE3: # BB#0:
1650
- ; SSSE3-NEXT: movaps %xmm1, %xmm2
1651
- ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1652
- ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1653
- ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[1,1]
1654
- ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1655
- ; SSSE3-NEXT: movaps %xmm2, %xmm0
1628
+ ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
1629
+ ; SSSE3-NEXT: movapd %xmm1, %xmm0
1656
1630
; SSSE3-NEXT: retq
1657
1631
;
1658
1632
; SSE41-LABEL: combine_test2b:
1659
1633
; SSE41: # BB#0:
1660
- ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
1661
- ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,1]
1662
- ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
1634
+ ; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
1635
+ ; SSE41-NEXT: movapd %xmm1, %xmm0
1663
1636
; SSE41-NEXT: retq
1664
1637
;
1665
1638
; AVX-LABEL: combine_test2b:
1666
1639
; AVX: # BB#0:
1667
- ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
1668
- ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,1]
1669
- ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
1640
+ ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0]
1670
1641
; AVX-NEXT: retq
1671
1642
%1 = shufflevector <4 x float > %a , <4 x float > %b , <4 x i32 > <i32 4 , i32 1 , i32 6 , i32 3 >
1672
1643
%2 = shufflevector <4 x float > %1 , <4 x float > %b , <4 x i32 > <i32 0 , i32 5 , i32 0 , i32 5 >
@@ -1698,21 +1669,13 @@ define <4 x float> @combine_test3b(<4 x float> %a, <4 x float> %b) {
1698
1669
define <4 x float > @combine_test4b (<4 x float > %a , <4 x float > %b ) {
1699
1670
; SSE2-LABEL: combine_test4b:
1700
1671
; SSE2: # BB#0:
1701
- ; SSE2-NEXT: movaps %xmm1, %xmm2
1702
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1703
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1704
- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[3,0]
1705
- ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[0,2]
1672
+ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
1706
1673
; SSE2-NEXT: movaps %xmm1, %xmm0
1707
1674
; SSE2-NEXT: retq
1708
1675
;
1709
1676
; SSSE3-LABEL: combine_test4b:
1710
1677
; SSSE3: # BB#0:
1711
- ; SSSE3-NEXT: movaps %xmm1, %xmm2
1712
- ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1713
- ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1714
- ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[3,0]
1715
- ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[0,2]
1678
+ ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
1716
1679
; SSSE3-NEXT: movaps %xmm1, %xmm0
1717
1680
; SSSE3-NEXT: retq
1718
1681
;
@@ -1968,17 +1931,11 @@ define <4 x float> @combine_blend_01(<4 x float> %a, <4 x float> %b) {
1968
1931
; SSE2-LABEL: combine_blend_01:
1969
1932
; SSE2: # BB#0:
1970
1933
; SSE2-NEXT: movsd %xmm1, %xmm0
1971
- ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1972
- ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
1973
- ; SSE2-NEXT: movaps %xmm1, %xmm0
1974
1934
; SSE2-NEXT: retq
1975
1935
;
1976
1936
; SSSE3-LABEL: combine_blend_01:
1977
1937
; SSSE3: # BB#0:
1978
1938
; SSSE3-NEXT: movsd %xmm1, %xmm0
1979
- ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1980
- ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
1981
- ; SSSE3-NEXT: movaps %xmm1, %xmm0
1982
1939
; SSSE3-NEXT: retq
1983
1940
;
1984
1941
; SSE41-LABEL: combine_blend_01:
@@ -2113,16 +2070,12 @@ define <4 x i32> @combine_test_movhl_3(<4 x i32> %a, <4 x i32> %b) {
2113
2070
define <4 x float > @combine_undef_input_test1 (<4 x float > %a , <4 x float > %b ) {
2114
2071
; SSE2-LABEL: combine_undef_input_test1:
2115
2072
; SSE2: # BB#0:
2116
- ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
2117
- ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[1,2]
2118
- ; SSE2-NEXT: movaps %xmm1, %xmm0
2073
+ ; SSE2-NEXT: movsd %xmm1, %xmm0
2119
2074
; SSE2-NEXT: retq
2120
2075
;
2121
2076
; SSSE3-LABEL: combine_undef_input_test1:
2122
2077
; SSSE3: # BB#0:
2123
- ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
2124
- ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[1,2]
2125
- ; SSSE3-NEXT: movaps %xmm1, %xmm0
2078
+ ; SSSE3-NEXT: movsd %xmm1, %xmm0
2126
2079
; SSSE3-NEXT: retq
2127
2080
;
2128
2081
; SSE41-LABEL: combine_undef_input_test1:
@@ -2302,16 +2255,12 @@ define <4 x float> @combine_undef_input_test10(<4 x float> %a) {
2302
2255
define <4 x float > @combine_undef_input_test11 (<4 x float > %a , <4 x float > %b ) {
2303
2256
; SSE2-LABEL: combine_undef_input_test11:
2304
2257
; SSE2: # BB#0:
2305
- ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
2306
- ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[1,2]
2307
- ; SSE2-NEXT: movaps %xmm1, %xmm0
2258
+ ; SSE2-NEXT: movsd %xmm1, %xmm0
2308
2259
; SSE2-NEXT: retq
2309
2260
;
2310
2261
; SSSE3-LABEL: combine_undef_input_test11:
2311
2262
; SSSE3: # BB#0:
2312
- ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
2313
- ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[1,2]
2314
- ; SSSE3-NEXT: movaps %xmm1, %xmm0
2263
+ ; SSSE3-NEXT: movsd %xmm1, %xmm0
2315
2264
; SSSE3-NEXT: retq
2316
2265
;
2317
2266
; SSE41-LABEL: combine_undef_input_test11:
0 commit comments