Skip to content

Commit 884b79a

Browse files
authored
[X86] Relax vbroadcast(vector load X) -> vbroadcast_load(X) to all types (#128039)
There's no need for a AVX1-only 32/64-bit scalar size limit - if the X86ISD::VBROADCAST node type is supported, X86ISD::VBROADCAST_LOAD will be as well.
1 parent 917ed99 commit 884b79a

File tree

3 files changed

+93
-128
lines changed

3 files changed

+93
-128
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42088,9 +42088,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4208842088
}
4208942089

4209042090
// vbroadcast(vector load X) -> vbroadcast_load
42091-
if ((SrcVT == MVT::v2f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v2i64 ||
42092-
SrcVT == MVT::v4i32) &&
42093-
Src.hasOneUse() && ISD::isNormalLoad(Src.getNode())) {
42091+
if (Src.hasOneUse() && ISD::isNormalLoad(Src.getNode())) {
4209442092
LoadSDNode *LN = cast<LoadSDNode>(Src);
4209542093
// Unless the load is volatile or atomic.
4209642094
if (LN->isSimple()) {

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 59 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -910,8 +910,8 @@ define void @vec256_i8_widen_to_i16_factor2_broadcast_to_v16i16_factor16(ptr %in
910910
; AVX2-LABEL: vec256_i8_widen_to_i16_factor2_broadcast_to_v16i16_factor16:
911911
; AVX2: # %bb.0:
912912
; AVX2-NEXT: vmovdqa 32(%rdi), %ymm0
913-
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u]
914913
; AVX2-NEXT: vpbroadcastb (%rdi), %ymm1
914+
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u]
915915
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
916916
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
917917
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
@@ -920,32 +920,32 @@ define void @vec256_i8_widen_to_i16_factor2_broadcast_to_v16i16_factor16(ptr %in
920920
;
921921
; AVX512F-LABEL: vec256_i8_widen_to_i16_factor2_broadcast_to_v16i16_factor16:
922922
; AVX512F: # %bb.0:
923-
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
924-
; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u]
925-
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
926-
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
923+
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
924+
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1
925+
; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u]
926+
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
927927
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
928928
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
929929
; AVX512F-NEXT: vzeroupper
930930
; AVX512F-NEXT: retq
931931
;
932932
; AVX512DQ-LABEL: vec256_i8_widen_to_i16_factor2_broadcast_to_v16i16_factor16:
933933
; AVX512DQ: # %bb.0:
934-
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm0
935-
; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u]
936-
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
937-
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
934+
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
935+
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm1
936+
; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u]
937+
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
938938
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
939939
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
940940
; AVX512DQ-NEXT: vzeroupper
941941
; AVX512DQ-NEXT: retq
942942
;
943943
; AVX512BW-LABEL: vec256_i8_widen_to_i16_factor2_broadcast_to_v16i16_factor16:
944944
; AVX512BW: # %bb.0:
945-
; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm0
946-
; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u]
947-
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
948-
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
945+
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm0
946+
; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm1
947+
; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u]
948+
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
949949
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
950950
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
951951
; AVX512BW-NEXT: vzeroupper
@@ -1906,12 +1906,10 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
19061906
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
19071907
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
19081908
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1909-
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
1910-
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
1911-
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
19121909
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
1913-
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
1910+
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
19141911
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
1912+
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
19151913
; AVX512F-NEXT: vzeroupper
19161914
; AVX512F-NEXT: retq
19171915
;
@@ -1921,12 +1919,10 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
19211919
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
19221920
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
19231921
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1924-
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
1925-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
1926-
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
19271922
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
1928-
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
1923+
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
19291924
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
1925+
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
19301926
; AVX512DQ-NEXT: vzeroupper
19311927
; AVX512DQ-NEXT: retq
19321928
;
@@ -1936,7 +1932,6 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
19361932
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
19371933
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
19381934
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1939-
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
19401935
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
19411936
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
19421937
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
@@ -2133,37 +2128,33 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
21332128
; AVX512F: # %bb.0:
21342129
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm0
21352130
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
2136-
; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
2137-
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
2138-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2139-
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2140-
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2141-
; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
2142-
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
2131+
; AVX512F-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
2132+
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2133+
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2134+
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
2135+
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
21432136
; AVX512F-NEXT: vzeroupper
21442137
; AVX512F-NEXT: retq
21452138
;
21462139
; AVX512DQ-LABEL: vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12:
21472140
; AVX512DQ: # %bb.0:
21482141
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm0
21492142
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
2150-
; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
2151-
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
2152-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2153-
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2154-
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2155-
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
2156-
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
2143+
; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
2144+
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2145+
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2146+
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
2147+
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
21572148
; AVX512DQ-NEXT: vzeroupper
21582149
; AVX512DQ-NEXT: retq
21592150
;
21602151
; AVX512BW-LABEL: vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12:
21612152
; AVX512BW: # %bb.0:
21622153
; AVX512BW-NEXT: vmovdqa 48(%rdi), %xmm0
2154+
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
21632155
; AVX512BW-NEXT: movl $286331153, %eax # imm = 0x11111111
21642156
; AVX512BW-NEXT: kmovd %eax, %k1
2165-
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1}
2166-
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
2157+
; AVX512BW-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1}
21672158
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
21682159
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
21692160
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
@@ -2364,37 +2355,33 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.e
23642355
; AVX512F: # %bb.0:
23652356
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm0
23662357
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
2367-
; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
2368-
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
2369-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2370-
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2371-
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2372-
; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
2373-
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
2358+
; AVX512F-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
2359+
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2360+
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2361+
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
2362+
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
23742363
; AVX512F-NEXT: vzeroupper
23752364
; AVX512F-NEXT: retq
23762365
;
23772366
; AVX512DQ-LABEL: vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6:
23782367
; AVX512DQ: # %bb.0:
23792368
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm0
23802369
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
2381-
; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
2382-
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
2383-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2384-
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2385-
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2386-
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
2387-
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
2370+
; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
2371+
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2372+
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2373+
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
2374+
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
23882375
; AVX512DQ-NEXT: vzeroupper
23892376
; AVX512DQ-NEXT: retq
23902377
;
23912378
; AVX512BW-LABEL: vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6:
23922379
; AVX512BW: # %bb.0:
23932380
; AVX512BW-NEXT: vmovdqa 48(%rdi), %xmm0
2381+
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
23942382
; AVX512BW-NEXT: movl $16843009, %eax # imm = 0x1010101
23952383
; AVX512BW-NEXT: kmovd %eax, %k1
2396-
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1}
2397-
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
2384+
; AVX512BW-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1}
23982385
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
23992386
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
24002387
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
@@ -2817,7 +2804,6 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
28172804
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm0
28182805
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm1
28192806
; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
2820-
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm1
28212807
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
28222808
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
28232809
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2830,7 +2816,6 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
28302816
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm0
28312817
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm1
28322818
; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
2833-
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm1
28342819
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
28352820
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
28362821
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2907,8 +2892,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
29072892
; AVX2-LABEL: vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8:
29082893
; AVX2: # %bb.0:
29092894
; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
2910-
; AVX2-NEXT: vpbroadcastw (%rdi), %xmm1
2911-
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],mem[1,2],xmm1[3],mem[4,5],xmm1[6],mem[7]
2895+
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2],xmm0[3],mem[4,5],xmm0[6],mem[7]
29122896
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
29132897
; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
29142898
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
@@ -2920,26 +2904,24 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
29202904
; AVX512F-LABEL: vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8:
29212905
; AVX512F: # %bb.0:
29222906
; AVX512F-NEXT: vpbroadcastw (%rdi), %xmm0
2923-
; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1,2],xmm0[3],mem[4,5],xmm0[6],mem[7]
2924-
; AVX512F-NEXT: vpbroadcastw (%rdi), %xmm1
2925-
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2926-
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2927-
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2928-
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
2929-
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
2907+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2],xmm0[3],mem[4,5],xmm0[6],mem[7]
2908+
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
2909+
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2910+
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2911+
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
2912+
; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
29302913
; AVX512F-NEXT: vzeroupper
29312914
; AVX512F-NEXT: retq
29322915
;
29332916
; AVX512DQ-LABEL: vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8:
29342917
; AVX512DQ: # %bb.0:
29352918
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %xmm0
2936-
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1,2],xmm0[3],mem[4,5],xmm0[6],mem[7]
2937-
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %xmm1
2938-
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2939-
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2940-
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2941-
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
2942-
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
2919+
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2],xmm0[3],mem[4,5],xmm0[6],mem[7]
2920+
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
2921+
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2922+
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2923+
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
2924+
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
29432925
; AVX512DQ-NEXT: vzeroupper
29442926
; AVX512DQ-NEXT: retq
29452927
;
@@ -3024,7 +3006,6 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
30243006
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm0
30253007
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm1
30263008
; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5,6,7],ymm1[8],ymm0[9,10,11],ymm1[12],ymm0[13,14,15]
3027-
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm1
30283009
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
30293010
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
30303011
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -3037,7 +3018,6 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
30373018
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm0
30383019
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm1
30393020
; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5,6,7],ymm1[8],ymm0[9,10,11],ymm1[12],ymm0[13,14,15]
3040-
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm1
30413021
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
30423022
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
30433023
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -3117,8 +3097,7 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
31173097
; AVX2-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4:
31183098
; AVX2: # %bb.0:
31193099
; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
3120-
; AVX2-NEXT: vpbroadcastw (%rdi), %xmm1
3121-
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],mem[1,2,3,4,5],xmm1[6],mem[7]
3100+
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
31223101
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
31233102
; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
31243103
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
@@ -3130,9 +3109,8 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
31303109
; AVX512F-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4:
31313110
; AVX512F: # %bb.0:
31323111
; AVX512F-NEXT: vpbroadcastw (%rdi), %xmm0
3133-
; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
3134-
; AVX512F-NEXT: vpbroadcastw (%rdi), %xmm1
3135-
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3112+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
3113+
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
31363114
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm1
31373115
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
31383116
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
@@ -3144,9 +3122,8 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
31443122
; AVX512DQ-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4:
31453123
; AVX512DQ: # %bb.0:
31463124
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %xmm0
3147-
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
3148-
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %xmm1
3149-
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3125+
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
3126+
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
31503127
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm1
31513128
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
31523129
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1

0 commit comments

Comments
 (0)