@@ -1808,6 +1808,49 @@ define <4 x double> @broadcast_v4f64_0000_from_v2i64(<2 x i64> %a0) {
1808
1808
ret <4 x double > %3
1809
1809
}
1810
1810
1811
+ ; PR114959
1812
+ define <4 x double > @concat_v4f64_0213_broadcasts (ptr %src ) {
1813
+ ; AVX1OR2-LABEL: concat_v4f64_0213_broadcasts:
1814
+ ; AVX1OR2: # %bb.0:
1815
+ ; AVX1OR2-NEXT: vmovups (%rdi), %xmm0
1816
+ ; AVX1OR2-NEXT: vmovups 32(%rdi), %xmm1
1817
+ ; AVX1OR2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1818
+ ; AVX1OR2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1819
+ ; AVX1OR2-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1820
+ ; AVX1OR2-NEXT: retq
1821
+ ;
1822
+ ; AVX512VL-SLOW-LABEL: concat_v4f64_0213_broadcasts:
1823
+ ; AVX512VL-SLOW: # %bb.0:
1824
+ ; AVX512VL-SLOW-NEXT: vmovups (%rdi), %xmm0
1825
+ ; AVX512VL-SLOW-NEXT: vmovups 32(%rdi), %xmm1
1826
+ ; AVX512VL-SLOW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1827
+ ; AVX512VL-SLOW-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1828
+ ; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1829
+ ; AVX512VL-SLOW-NEXT: retq
1830
+ ;
1831
+ ; AVX512VL-FAST-ALL-LABEL: concat_v4f64_0213_broadcasts:
1832
+ ; AVX512VL-FAST-ALL: # %bb.0:
1833
+ ; AVX512VL-FAST-ALL-NEXT: vmovupd (%rdi), %xmm1
1834
+ ; AVX512VL-FAST-ALL-NEXT: vmovupd 32(%rdi), %xmm2
1835
+ ; AVX512VL-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm0 = [0,4,1,5]
1836
+ ; AVX512VL-FAST-ALL-NEXT: vpermi2pd %ymm2, %ymm1, %ymm0
1837
+ ; AVX512VL-FAST-ALL-NEXT: retq
1838
+ ;
1839
+ ; AVX512VL-FAST-PERLANE-LABEL: concat_v4f64_0213_broadcasts:
1840
+ ; AVX512VL-FAST-PERLANE: # %bb.0:
1841
+ ; AVX512VL-FAST-PERLANE-NEXT: vmovups (%rdi), %xmm0
1842
+ ; AVX512VL-FAST-PERLANE-NEXT: vmovups 32(%rdi), %xmm1
1843
+ ; AVX512VL-FAST-PERLANE-NEXT: vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1844
+ ; AVX512VL-FAST-PERLANE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1845
+ ; AVX512VL-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1846
+ ; AVX512VL-FAST-PERLANE-NEXT: retq
1847
+ %src.hi = getelementptr inbounds i8 , ptr %src , i64 32
1848
+ %lo = load <2 x double >, ptr %src , align 1
1849
+ %hi = load <2 x double >, ptr %src.hi , align 1
1850
+ %shuffle = shufflevector <2 x double > %lo , <2 x double > %hi , <4 x i32 > <i32 0 , i32 2 , i32 1 , i32 3 >
1851
+ ret <4 x double > %shuffle
1852
+ }
1853
+
1811
1854
define <4 x double > @bitcast_v4f64_0426 (<4 x double > %a , <4 x double > %b ) {
1812
1855
; ALL-LABEL: bitcast_v4f64_0426:
1813
1856
; ALL: # %bb.0:
0 commit comments