Skip to content

Commit 1aacd31

Browse files
authored
Revert "[X86] combineBROADCAST_LOAD - merge across chains (#128209)"
This reverts commit e21a173.
1 parent bac6e7b commit 1aacd31

File tree

3 files changed

+155
-128
lines changed

3 files changed

+155
-128
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59360,14 +59360,21 @@ static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG,
5936059360
return DAG.getNode(ISD::FP_EXTEND, dl, VT, Cvt);
5936159361
}
5936259362

59363-
// Try to find a larger VBROADCAST_LOAD/SUBV_BROADCAST_LOAD that we can extract.
59363+
// Try to find a larger VBROADCAST_LOAD/SUBV_BROADCAST_LOAD that we can extract
59364+
// from. Limit this to cases where the loads have the same input chain and the
59365+
// output chains are unused. This avoids any memory ordering issues.
5936459366
static SDValue combineBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG,
5936559367
TargetLowering::DAGCombinerInfo &DCI) {
5936659368
assert((N->getOpcode() == X86ISD::VBROADCAST_LOAD ||
5936759369
N->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) &&
5936859370
"Unknown broadcast load type");
5936959371

59372+
// Only do this if the chain result is unused.
59373+
if (N->hasAnyUseOfValue(1))
59374+
return SDValue();
59375+
5937059376
auto *MemIntrin = cast<MemIntrinsicSDNode>(N);
59377+
5937159378
SDValue Ptr = MemIntrin->getBasePtr();
5937259379
SDValue Chain = MemIntrin->getChain();
5937359380
EVT VT = N->getSimpleValueType(0);
@@ -59381,15 +59388,12 @@ static SDValue combineBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG,
5938159388
cast<MemIntrinsicSDNode>(User)->getChain() == Chain &&
5938259389
cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() ==
5938359390
MemVT.getSizeInBits() &&
59391+
!User->hasAnyUseOfValue(1) &&
5938459392
User->getValueSizeInBits(0).getFixedValue() > VT.getFixedSizeInBits()) {
59385-
assert(cast<MemIntrinsicSDNode>(User)->isSimple() &&
59386-
MemIntrin->isSimple() && "Illegal broadcast load type");
5938759393
SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N),
5938859394
VT.getSizeInBits());
5938959395
Extract = DAG.getBitcast(VT, Extract);
59390-
Extract = DCI.CombineTo(N, Extract, SDValue(User, 1));
59391-
DAG.makeEquivalentMemoryOrdering(SDValue(N, 1), Extract.getValue(1));
59392-
return Extract;
59396+
return DCI.CombineTo(N, Extract, SDValue(User, 1));
5939359397
}
5939459398

5939559399
return SDValue();

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 105 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1888,14 +1888,15 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
18881888
;
18891889
; AVX2-LABEL: vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24:
18901890
; AVX2: # %bb.0:
1891-
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
1892-
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
1893-
; AVX2-NEXT: vpbroadcastb (%rdi), %ymm1
1894-
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1895-
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
1896-
; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
1897-
; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
1898-
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
1891+
; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
1892+
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
1893+
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
1894+
; AVX2-NEXT: vpbroadcastb (%rdi), %ymm2
1895+
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23]
1896+
; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
1897+
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
1898+
; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
1899+
; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
18991900
; AVX2-NEXT: vzeroupper
19001901
; AVX2-NEXT: retq
19011902
;
@@ -2111,14 +2112,15 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
21112112
;
21122113
; AVX2-LABEL: vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12:
21132114
; AVX2: # %bb.0:
2114-
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
2115-
; AVX2-NEXT: vpbroadcastb (%rdi), %ymm1
2116-
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
2117-
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
2118-
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2119-
; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2120-
; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
2121-
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
2115+
; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
2116+
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
2117+
; AVX2-NEXT: vpbroadcastb (%rdi), %ymm2
2118+
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
2119+
; AVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
2120+
; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2121+
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2122+
; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
2123+
; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
21222124
; AVX2-NEXT: vzeroupper
21232125
; AVX2-NEXT: retq
21242126
;
@@ -2235,29 +2237,33 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
22352237
;
22362238
; AVX512F-LABEL: vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8:
22372239
; AVX512F: # %bb.0:
2238-
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
2239-
; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2240-
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2241-
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
2242-
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2243-
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2244-
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2245-
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
2246-
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
2240+
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
2241+
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2242+
; AVX512F-NEXT: vmovdqa (%rdi), %xmm1
2243+
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
2244+
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2245+
; AVX512F-NEXT: vpbroadcastb (%rdi), %xmm2
2246+
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2247+
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2248+
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2249+
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
2250+
; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
22472251
; AVX512F-NEXT: vzeroupper
22482252
; AVX512F-NEXT: retq
22492253
;
22502254
; AVX512DQ-LABEL: vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8:
22512255
; AVX512DQ: # %bb.0:
2252-
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
2253-
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2254-
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2255-
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
2256-
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2257-
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2258-
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2259-
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
2260-
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
2256+
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
2257+
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2258+
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm1
2259+
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
2260+
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2261+
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %xmm2
2262+
; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2263+
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2264+
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2265+
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
2266+
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
22612267
; AVX512DQ-NEXT: vzeroupper
22622268
; AVX512DQ-NEXT: retq
22632269
;
@@ -2266,8 +2272,9 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
22662272
; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
22672273
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
22682274
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2269-
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
2275+
; AVX512BW-NEXT: vpbroadcastb (%rdi), %xmm1
22702276
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2277+
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
22712278
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
22722279
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
22732280
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
@@ -2332,14 +2339,15 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.e
23322339
;
23332340
; AVX2-LABEL: vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6:
23342341
; AVX2: # %bb.0:
2335-
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
2336-
; AVX2-NEXT: vpbroadcastb (%rdi), %ymm1
2337-
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
2338-
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
2339-
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2340-
; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2341-
; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
2342-
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
2342+
; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
2343+
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
2344+
; AVX2-NEXT: vpbroadcastb (%rdi), %ymm2
2345+
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
2346+
; AVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
2347+
; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2348+
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2349+
; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
2350+
; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
23432351
; AVX2-NEXT: vzeroupper
23442352
; AVX2-NEXT: retq
23452353
;
@@ -2454,29 +2462,33 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
24542462
;
24552463
; AVX512F-LABEL: vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4:
24562464
; AVX512F: # %bb.0:
2457-
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
2458-
; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2459-
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
2460-
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
2461-
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2462-
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2463-
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2464-
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
2465-
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
2465+
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
2466+
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2467+
; AVX512F-NEXT: vmovdqa (%rdi), %xmm1
2468+
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
2469+
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
2470+
; AVX512F-NEXT: vpbroadcastb (%rdi), %xmm2
2471+
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2472+
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2473+
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2474+
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
2475+
; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
24662476
; AVX512F-NEXT: vzeroupper
24672477
; AVX512F-NEXT: retq
24682478
;
24692479
; AVX512DQ-LABEL: vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4:
24702480
; AVX512DQ: # %bb.0:
2471-
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
2472-
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2473-
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
2474-
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
2475-
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2476-
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2477-
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2478-
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
2479-
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
2481+
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
2482+
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2483+
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm1
2484+
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
2485+
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
2486+
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %xmm2
2487+
; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2488+
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2489+
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2490+
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
2491+
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
24802492
; AVX512DQ-NEXT: vzeroupper
24812493
; AVX512DQ-NEXT: retq
24822494
;
@@ -2485,8 +2497,9 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
24852497
; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
24862498
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
24872499
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
2488-
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
2500+
; AVX512BW-NEXT: vpbroadcastb (%rdi), %xmm1
24892501
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2502+
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
24902503
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
24912504
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
24922505
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
@@ -2775,13 +2788,14 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
27752788
;
27762789
; AVX2-LABEL: vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12:
27772790
; AVX2: # %bb.0:
2778-
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
2779-
; AVX2-NEXT: vpbroadcastw (%rdi), %ymm1
2780-
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
2781-
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2782-
; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2783-
; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
2784-
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
2791+
; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
2792+
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
2793+
; AVX2-NEXT: vpbroadcastw (%rdi), %ymm2
2794+
; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7],ymm2[8],ymm1[9],ymm2[10],ymm1[11],ymm2[12],ymm1[13],ymm2[14],ymm1[15]
2795+
; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2796+
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2797+
; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
2798+
; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
27852799
; AVX2-NEXT: vzeroupper
27862800
; AVX2-NEXT: retq
27872801
;
@@ -2976,13 +2990,14 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
29762990
;
29772991
; AVX2-LABEL: vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6:
29782992
; AVX2: # %bb.0:
2979-
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
2980-
; AVX2-NEXT: vpbroadcastw (%rdi), %ymm1
2981-
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5,6,7],ymm1[8],ymm0[9,10,11],ymm1[12],ymm0[13,14,15]
2982-
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
2983-
; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
2984-
; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
2985-
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
2993+
; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
2994+
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
2995+
; AVX2-NEXT: vpbroadcastw (%rdi), %ymm2
2996+
; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3],ymm2[4],ymm1[5,6,7],ymm2[8],ymm1[9,10,11],ymm2[12],ymm1[13,14,15]
2997+
; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
2998+
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
2999+
; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
3000+
; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
29863001
; AVX2-NEXT: vzeroupper
29873002
; AVX2-NEXT: retq
29883003
;
@@ -3093,25 +3108,27 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
30933108
;
30943109
; AVX512F-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4:
30953110
; AVX512F: # %bb.0:
3096-
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0
3111+
; AVX512F-NEXT: vpbroadcastw (%rdi), %xmm0
30973112
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
3098-
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3099-
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3100-
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3101-
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
3102-
; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
3113+
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
3114+
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm1
3115+
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3116+
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
3117+
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
3118+
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
31033119
; AVX512F-NEXT: vzeroupper
31043120
; AVX512F-NEXT: retq
31053121
;
31063122
; AVX512DQ-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4:
31073123
; AVX512DQ: # %bb.0:
3108-
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0
3124+
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %xmm0
31093125
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
3110-
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3111-
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3112-
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3113-
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
3114-
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
3126+
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
3127+
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm1
3128+
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3129+
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
3130+
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
3131+
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
31153132
; AVX512DQ-NEXT: vzeroupper
31163133
; AVX512DQ-NEXT: retq
31173134
;

0 commit comments

Comments
 (0)