@@ -1826,45 +1826,40 @@ define void @splat4_v8i32_load_store(<8 x i32>* %s, <32 x i32>* %d) {
1826
1826
define void @splat4_v4f64_load_store (<4 x double >* %s , <16 x double >* %d ) {
1827
1827
; AVX1-LABEL: splat4_v4f64_load_store:
1828
1828
; AVX1: # %bb.0:
1829
- ; AVX1-NEXT: vmovupd (%rdi), %ymm0
1830
- ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[0,1,0,1]
1831
- ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1832
- ; AVX1-NEXT: vmovddup {{.*#+}} ymm2 = ymm1[0,0,2,2]
1833
- ; AVX1-NEXT: vmovddup {{.*#+}} ymm3 = ymm0[0,0,2,2]
1834
- ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,1,3,3]
1835
- ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
1836
- ; AVX1-NEXT: vmovupd %ymm0, 96(%rsi)
1837
- ; AVX1-NEXT: vmovupd %ymm3, 64(%rsi)
1838
- ; AVX1-NEXT: vmovupd %ymm1, 32(%rsi)
1839
- ; AVX1-NEXT: vmovupd %ymm2, (%rsi)
1829
+ ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
1830
+ ; AVX1-NEXT: vbroadcastsd 16(%rdi), %ymm1
1831
+ ; AVX1-NEXT: vbroadcastsd 8(%rdi), %ymm2
1832
+ ; AVX1-NEXT: vbroadcastsd 24(%rdi), %ymm3
1833
+ ; AVX1-NEXT: vmovups %ymm3, 96(%rsi)
1834
+ ; AVX1-NEXT: vmovups %ymm1, 64(%rsi)
1835
+ ; AVX1-NEXT: vmovups %ymm2, 32(%rsi)
1836
+ ; AVX1-NEXT: vmovups %ymm0, (%rsi)
1840
1837
; AVX1-NEXT: vzeroupper
1841
1838
; AVX1-NEXT: retq
1842
1839
;
1843
1840
; AVX2-LABEL: splat4_v4f64_load_store:
1844
1841
; AVX2: # %bb.0:
1845
- ; AVX2-NEXT: vmovups (%rdi), %ymm0
1846
- ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1
1847
- ; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm0[2,2,2,2]
1848
- ; AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[1,1,1,1]
1849
- ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
1850
- ; AVX2-NEXT: vmovups %ymm0, 96(%rsi)
1851
- ; AVX2-NEXT: vmovups %ymm2, 64(%rsi)
1852
- ; AVX2-NEXT: vmovups %ymm3, 32(%rsi)
1853
- ; AVX2-NEXT: vmovups %ymm1, (%rsi)
1842
+ ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
1843
+ ; AVX2-NEXT: vbroadcastsd 16(%rdi), %ymm1
1844
+ ; AVX2-NEXT: vbroadcastsd 8(%rdi), %ymm2
1845
+ ; AVX2-NEXT: vbroadcastsd 24(%rdi), %ymm3
1846
+ ; AVX2-NEXT: vmovups %ymm3, 96(%rsi)
1847
+ ; AVX2-NEXT: vmovups %ymm1, 64(%rsi)
1848
+ ; AVX2-NEXT: vmovups %ymm2, 32(%rsi)
1849
+ ; AVX2-NEXT: vmovups %ymm0, (%rsi)
1854
1850
; AVX2-NEXT: vzeroupper
1855
1851
; AVX2-NEXT: retq
1856
1852
;
1857
1853
; AVX512-LABEL: splat4_v4f64_load_store:
1858
1854
; AVX512: # %bb.0:
1859
- ; AVX512-NEXT: vmovups (%rdi), %ymm0
1860
- ; AVX512-NEXT: vbroadcastsd (%rdi), %ymm1
1861
- ; AVX512-NEXT: vpermpd {{.*#+}} ymm2 = ymm0[2,2,2,2]
1862
- ; AVX512-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[1,1,1,1]
1863
- ; AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
1855
+ ; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
1856
+ ; AVX512-NEXT: vbroadcastsd 16 (%rdi), %ymm1
1857
+ ; AVX512-NEXT: vbroadcastsd 8(%rdi), % ymm2
1858
+ ; AVX512-NEXT: vbroadcastsd 24(%rdi), % ymm3
1859
+ ; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
1864
1860
; AVX512-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm1
1865
- ; AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
1866
- ; AVX512-NEXT: vmovups %zmm0, 64(%rsi)
1867
- ; AVX512-NEXT: vmovups %zmm1, (%rsi)
1861
+ ; AVX512-NEXT: vmovups %zmm1, 64(%rsi)
1862
+ ; AVX512-NEXT: vmovups %zmm0, (%rsi)
1868
1863
; AVX512-NEXT: vzeroupper
1869
1864
; AVX512-NEXT: retq
1870
1865
%x = load <4 x double >, <4 x double >* %s , align 8
@@ -1878,45 +1873,40 @@ define void @splat4_v4f64_load_store(<4 x double>* %s, <16 x double>* %d) {
1878
1873
define void @splat4_v4i64_load_store (<4 x i64 >* %s , <16 x i64 >* %d ) {
1879
1874
; AVX1-LABEL: splat4_v4i64_load_store:
1880
1875
; AVX1: # %bb.0:
1881
- ; AVX1-NEXT: vmovupd (%rdi), %ymm0
1882
- ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[0,1,0,1]
1883
- ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1884
- ; AVX1-NEXT: vmovddup {{.*#+}} ymm2 = ymm1[0,0,2,2]
1885
- ; AVX1-NEXT: vmovddup {{.*#+}} ymm3 = ymm0[0,0,2,2]
1886
- ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,1,3,3]
1887
- ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
1888
- ; AVX1-NEXT: vmovupd %ymm0, 96(%rsi)
1889
- ; AVX1-NEXT: vmovupd %ymm3, 64(%rsi)
1890
- ; AVX1-NEXT: vmovupd %ymm1, 32(%rsi)
1891
- ; AVX1-NEXT: vmovupd %ymm2, (%rsi)
1876
+ ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
1877
+ ; AVX1-NEXT: vbroadcastsd 16(%rdi), %ymm1
1878
+ ; AVX1-NEXT: vbroadcastsd 8(%rdi), %ymm2
1879
+ ; AVX1-NEXT: vbroadcastsd 24(%rdi), %ymm3
1880
+ ; AVX1-NEXT: vmovups %ymm3, 96(%rsi)
1881
+ ; AVX1-NEXT: vmovups %ymm1, 64(%rsi)
1882
+ ; AVX1-NEXT: vmovups %ymm2, 32(%rsi)
1883
+ ; AVX1-NEXT: vmovups %ymm0, (%rsi)
1892
1884
; AVX1-NEXT: vzeroupper
1893
1885
; AVX1-NEXT: retq
1894
1886
;
1895
1887
; AVX2-LABEL: splat4_v4i64_load_store:
1896
1888
; AVX2: # %bb.0:
1897
- ; AVX2-NEXT: vmovups (%rdi), %ymm0
1898
- ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1
1899
- ; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm0[2,2,2,2]
1900
- ; AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[1,1,1,1]
1901
- ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
1902
- ; AVX2-NEXT: vmovups %ymm0, 96(%rsi)
1903
- ; AVX2-NEXT: vmovups %ymm2, 64(%rsi)
1904
- ; AVX2-NEXT: vmovups %ymm3, 32(%rsi)
1905
- ; AVX2-NEXT: vmovups %ymm1, (%rsi)
1889
+ ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
1890
+ ; AVX2-NEXT: vbroadcastsd 16(%rdi), %ymm1
1891
+ ; AVX2-NEXT: vbroadcastsd 8(%rdi), %ymm2
1892
+ ; AVX2-NEXT: vbroadcastsd 24(%rdi), %ymm3
1893
+ ; AVX2-NEXT: vmovups %ymm3, 96(%rsi)
1894
+ ; AVX2-NEXT: vmovups %ymm1, 64(%rsi)
1895
+ ; AVX2-NEXT: vmovups %ymm2, 32(%rsi)
1896
+ ; AVX2-NEXT: vmovups %ymm0, (%rsi)
1906
1897
; AVX2-NEXT: vzeroupper
1907
1898
; AVX2-NEXT: retq
1908
1899
;
1909
1900
; AVX512-LABEL: splat4_v4i64_load_store:
1910
1901
; AVX512: # %bb.0:
1911
- ; AVX512-NEXT: vmovups (%rdi), %ymm0
1912
- ; AVX512-NEXT: vbroadcastsd (%rdi), %ymm1
1913
- ; AVX512-NEXT: vpermpd {{.*#+}} ymm2 = ymm0[2,2,2,2]
1914
- ; AVX512-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[1,1,1,1]
1915
- ; AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
1902
+ ; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
1903
+ ; AVX512-NEXT: vbroadcastsd 16 (%rdi), %ymm1
1904
+ ; AVX512-NEXT: vbroadcastsd 8(%rdi), % ymm2
1905
+ ; AVX512-NEXT: vbroadcastsd 24(%rdi), % ymm3
1906
+ ; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
1916
1907
; AVX512-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm1
1917
- ; AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
1918
- ; AVX512-NEXT: vmovups %zmm0, 64(%rsi)
1919
- ; AVX512-NEXT: vmovups %zmm1, (%rsi)
1908
+ ; AVX512-NEXT: vmovups %zmm1, 64(%rsi)
1909
+ ; AVX512-NEXT: vmovups %zmm0, (%rsi)
1920
1910
; AVX512-NEXT: vzeroupper
1921
1911
; AVX512-NEXT: retq
1922
1912
%x = load <4 x i64 >, <4 x i64 >* %s , align 8
0 commit comments