Skip to content

Commit 5e1fba4

Browse files
authored
Revert "[X86] combineINSERT_SUBVECTOR - fold insert_subvector(base,extract_subvector(broadcast)) -> blend shuffle(base,broadcast)" (#133340)
Reverts #133083 This causes BuildBot failures, and causes time outs in some of our internal tests (3 min => over 5 min).
1 parent e9dc051 commit 5e1fba4

File tree

4 files changed

+30
-41
lines changed

4 files changed

+30
-41
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -58823,8 +58823,6 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5882358823

5882458824
uint64_t IdxVal = N->getConstantOperandVal(2);
5882558825
MVT SubVecVT = SubVec.getSimpleValueType();
58826-
int VecNumElts = OpVT.getVectorNumElements();
58827-
int SubVecNumElts = SubVecVT.getVectorNumElements();
5882858826

5882958827
if (Vec.isUndef() && SubVec.isUndef())
5883058828
return DAG.getUNDEF(OpVT);
@@ -58884,34 +58882,19 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5888458882
SubVec.getOperand(0).getSimpleValueType() == OpVT &&
5888558883
(IdxVal != 0 ||
5888658884
!(Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())))) {
58887-
SDValue ExtSrc = SubVec.getOperand(0);
5888858885
int ExtIdxVal = SubVec.getConstantOperandVal(1);
5888958886
if (ExtIdxVal != 0) {
58887+
int VecNumElts = OpVT.getVectorNumElements();
58888+
int SubVecNumElts = SubVecVT.getVectorNumElements();
5889058889
SmallVector<int, 64> Mask(VecNumElts);
5889158890
// First create an identity shuffle mask.
5889258891
for (int i = 0; i != VecNumElts; ++i)
5889358892
Mask[i] = i;
5889458893
// Now insert the extracted portion.
5889558894
for (int i = 0; i != SubVecNumElts; ++i)
5889658895
Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts;
58897-
return DAG.getVectorShuffle(OpVT, dl, Vec, ExtSrc, Mask);
58898-
}
58899-
// If we're broadcasting, see if we can use a blend instead of
58900-
// extract/insert pair. For subvector broadcasts, we must ensure that the
58901-
// subvector is aligned with the insertion/extractions.
58902-
if (ExtSrc.getOpcode() == X86ISD::VBROADCAST ||
58903-
ExtSrc.getOpcode() == X86ISD::VBROADCAST_LOAD ||
58904-
(ExtSrc.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
58905-
(ExtIdxVal % SubVecNumElts) == 0 && (IdxVal % SubVecNumElts) == 0 &&
58906-
cast<MemIntrinsicSDNode>(ExtSrc)->getMemoryVT() == SubVecVT)) {
58907-
SmallVector<int, 64> Mask(VecNumElts);
58908-
// First create an identity shuffle mask.
58909-
for (int i = 0; i != VecNumElts; ++i)
58910-
Mask[i] = i;
58911-
// Now blend the broadcast.
58912-
for (int i = 0; i != SubVecNumElts; ++i)
58913-
Mask[i + IdxVal] = i + IdxVal + VecNumElts;
58914-
return DAG.getVectorShuffle(OpVT, dl, Vec, ExtSrc, Mask);
58896+
58897+
return DAG.getVectorShuffle(OpVT, dl, Vec, SubVec.getOperand(0), Mask);
5891558898
}
5891658899
}
5891758900

@@ -58959,7 +58942,7 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5895958942
// If we're splatting the lower half subvector of a full vector load into the
5896058943
// upper half, attempt to create a subvector broadcast.
5896158944
// TODO: Drop hasOneUse checks.
58962-
if (IdxVal == (VecNumElts / 2) &&
58945+
if (IdxVal == (OpVT.getVectorNumElements() / 2) &&
5896358946
Vec.getValueSizeInBits() == (2 * SubVec.getValueSizeInBits()) &&
5896458947
(Vec.hasOneUse() || SubVec.hasOneUse())) {
5896558948
auto *VecLd = dyn_cast<LoadSDNode>(Vec);

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2837,7 +2837,8 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28372837
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28382838
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28392839
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
2840-
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2840+
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2841+
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
28412842
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
28422843
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
28432844
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2854,7 +2855,8 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28542855
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28552856
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28562857
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
2857-
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2858+
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2859+
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
28582860
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
28592861
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
28602862
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2870,7 +2872,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28702872
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28712873
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28722874
; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0
2873-
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2875+
; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
28742876
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
28752877
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
28762878
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -3098,7 +3100,8 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
30983100
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
30993101
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
31003102
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
3101-
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3103+
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3104+
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
31023105
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
31033106
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
31043107
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3115,7 +3118,8 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
31153118
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
31163119
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
31173120
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
3118-
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3121+
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3122+
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
31193123
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
31203124
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
31213125
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3131,7 +3135,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
31313135
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
31323136
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
31333137
; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0
3134-
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3138+
; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
31353139
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
31363140
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
31373141
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -3864,11 +3868,12 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
38643868
; AVX512F: # %bb.0:
38653869
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
38663870
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
3867-
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38683871
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
38693872
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
3873+
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3874+
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38703875
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
3871-
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3876+
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
38723877
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
38733878
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
38743879
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3880,11 +3885,12 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
38803885
; AVX512DQ: # %bb.0:
38813886
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
38823887
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
3883-
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38843888
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
38853889
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
3890+
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3891+
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38863892
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
3887-
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3893+
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
38883894
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
38893895
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
38903896
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2239,7 +2239,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
22392239
; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
22402240
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
22412241
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
2242-
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2242+
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
22432243
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
22442244
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
22452245
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2253,7 +2253,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
22532253
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
22542254
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
22552255
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
2256-
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2256+
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
22572257
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
22582258
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
22592259
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2267,7 +2267,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
22672267
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
22682268
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
22692269
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
2270-
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2270+
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
22712271
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
22722272
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
22732273
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
@@ -2458,7 +2458,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
24582458
; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
24592459
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
24602460
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
2461-
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2461+
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
24622462
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
24632463
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
24642464
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2472,7 +2472,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
24722472
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
24732473
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
24742474
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
2475-
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2475+
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
24762476
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
24772477
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
24782478
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2486,7 +2486,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
24862486
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
24872487
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
24882488
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
2489-
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2489+
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
24902490
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
24912491
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
24922492
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
@@ -3095,7 +3095,7 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
30953095
; AVX512F: # %bb.0:
30963096
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0
30973097
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
3098-
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3098+
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
30993099
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
31003100
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
31013101
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
@@ -3107,7 +3107,7 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
31073107
; AVX512DQ: # %bb.0:
31083108
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0
31093109
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
3110-
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3110+
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
31113111
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
31123112
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
31133113
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)

llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-5.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ define void @store_i32_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
350350
; AVX-NEXT: vshufpd {{.*#+}} ymm4 = ymm4[0,0,3,3]
351351
; AVX-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2,3],ymm4[4,5,6],ymm5[7]
352352
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm5 = mem[0,1,0,1]
353-
; AVX-NEXT: vblendps {{.*#+}} ymm7 = ymm0[0,1,2,3],ymm5[4,5,6,7]
353+
; AVX-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm7
354354
; AVX-NEXT: vblendps {{.*#+}} ymm4 = ymm7[0],ymm4[1,2,3],ymm7[4],ymm4[5,6,7]
355355
; AVX-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[u,u,u,2,u,u,u,7]
356356
; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4,5,6,7]

0 commit comments

Comments
 (0)