Skip to content

Commit c5afcfe

Browse files
authored
[X86] combineINSERT_SUBVECTOR - fold insert_subvector(base,extract_subvector(broadcast)) -> blend shuffle(base,broadcast) (REAPPLIED) (#133724)
If the broadcast is already the full vector width, try to prefer a blend/vshuff64x2 over a vector insertion which is usually lower latency (and sometimes a lower uop count), and reduces changes in vector sizes that can interfere with further combines. Updated version of #133083 - which lead to infinite loops due to shuffle lowering recreating the INSERT_SUBVECTOR pattern, this variant creates the BLENDI/SHUF128 nodes directly.
1 parent 5ff8c03 commit c5afcfe

File tree

5 files changed

+46
-22
lines changed

5 files changed

+46
-22
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58896,6 +58896,30 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5889658896
Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts;
5889758897
return DAG.getVectorShuffle(OpVT, dl, Vec, ExtSrc, Mask);
5889858898
}
58899+
// If we're broadcasting, see if we can use a blend instead of
58900+
// extract/insert pair. Ensure that the subvector is aligned with the
58901+
// insertion/extractions.
58902+
if ((ExtIdxVal % SubVecNumElts) == 0 && (IdxVal % SubVecNumElts) == 0 &&
58903+
(ExtSrc.getOpcode() == X86ISD::VBROADCAST ||
58904+
ExtSrc.getOpcode() == X86ISD::VBROADCAST_LOAD ||
58905+
(ExtSrc.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
58906+
cast<MemIntrinsicSDNode>(ExtSrc)->getMemoryVT() == SubVecVT))) {
58907+
if (OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
58908+
uint64_t BlendMask = IdxVal == 0 ? 0x0F : 0xF0;
58909+
SDValue Blend = DAG.getNode(
58910+
X86ISD::BLENDI, dl, MVT::v8f32, DAG.getBitcast(MVT::v8f32, Vec),
58911+
DAG.getBitcast(MVT::v8f32, ExtSrc),
58912+
DAG.getTargetConstant(BlendMask, dl, MVT::i8));
58913+
return DAG.getBitcast(OpVT, Blend);
58914+
} else if (OpVT.is512BitVector() && SubVecVT.is256BitVector()) {
58915+
SDValue Lo = DAG.getBitcast(MVT::v8f64, IdxVal == 0 ? ExtSrc : Vec);
58916+
SDValue Hi = DAG.getBitcast(MVT::v8f64, IdxVal == 0 ? Vec : ExtSrc);
58917+
SDValue Shuffle =
58918+
DAG.getNode(X86ISD::SHUF128, dl, MVT::v8f64, Lo, Hi,
58919+
getV4X86ShuffleImm8ForMask({0, 1, 2, 3}, dl, DAG));
58920+
return DAG.getBitcast(OpVT, Shuffle);
58921+
}
58922+
}
5889958923
}
5890058924

5890158925
// Match concat_vector style patterns.

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2573,7 +2573,7 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
25732573
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
25742574
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,15,3,4,15,6,7,15,9,10,15,12,13,15]
25752575
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
2576-
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
2576+
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
25772577
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
25782578
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
25792579
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2590,7 +2590,7 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
25902590
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
25912591
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,15,3,4,15,6,7,15,9,10,15,12,13,15]
25922592
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
2593-
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
2593+
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
25942594
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
25952595
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
25962596
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2835,7 +2835,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28352835
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28362836
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28372837
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
2838-
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
2838+
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
28392839
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
28402840
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
28412841
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2852,7 +2852,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28522852
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28532853
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28542854
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
2855-
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
2855+
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
28562856
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
28572857
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
28582858
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2868,7 +2868,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28682868
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28692869
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28702870
; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0
2871-
; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
2871+
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
28722872
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
28732873
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
28742874
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -3096,7 +3096,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
30963096
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
30973097
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
30983098
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
3099-
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3099+
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
31003100
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
31013101
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
31023102
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3113,7 +3113,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
31133113
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
31143114
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
31153115
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
3116-
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3116+
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
31173117
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
31183118
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
31193119
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3129,7 +3129,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
31293129
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
31303130
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
31313131
; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0
3132-
; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3132+
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
31333133
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
31343134
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
31353135
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -3612,7 +3612,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
36123612
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
36133613
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
36143614
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
3615-
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3615+
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
36163616
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
36173617
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
36183618
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3628,7 +3628,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
36283628
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
36293629
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
36303630
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
3631-
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3631+
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
36323632
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
36333633
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
36343634
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3864,7 +3864,7 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
38643864
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
38653865
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
38663866
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
3867-
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3867+
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
38683868
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
38693869
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
38703870
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3880,7 +3880,7 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
38803880
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
38813881
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
38823882
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
3883-
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3883+
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
38843884
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
38853885
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
38863886
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2239,7 +2239,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
22392239
; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
22402240
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
22412241
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
2242-
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2242+
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
22432243
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
22442244
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
22452245
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2253,7 +2253,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
22532253
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
22542254
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
22552255
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
2256-
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2256+
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
22572257
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
22582258
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
22592259
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2267,7 +2267,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
22672267
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
22682268
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
22692269
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
2270-
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2270+
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
22712271
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
22722272
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
22732273
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
@@ -2458,7 +2458,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
24582458
; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
24592459
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
24602460
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
2461-
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2461+
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
24622462
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
24632463
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
24642464
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2472,7 +2472,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
24722472
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
24732473
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
24742474
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
2475-
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2475+
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
24762476
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
24772477
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
24782478
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2486,7 +2486,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
24862486
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
24872487
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
24882488
; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
2489-
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2489+
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
24902490
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
24912491
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
24922492
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
@@ -3095,7 +3095,7 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
30953095
; AVX512F: # %bb.0:
30963096
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0
30973097
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
3098-
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3098+
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
30993099
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
31003100
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
31013101
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
@@ -3107,7 +3107,7 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
31073107
; AVX512DQ: # %bb.0:
31083108
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0
31093109
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
3110-
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3110+
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
31113111
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
31123112
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
31133113
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)

llvm/test/CodeGen/X86/insert-subvector-broadcast.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ define void @insert_subvector_broadcast_as_blend() {
99
; CHECK-NEXT: vpbroadcastq %rax, %zmm0
1010
; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
1111
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
12-
; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm1
12+
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm0[4,5,6,7]
1313
; CHECK-NEXT: vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k0
1414
; CHECK-NEXT: vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k1
1515
; CHECK-NEXT: kunpckbw %k0, %k1, %k1

llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-5.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ define void @store_i32_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
350350
; AVX-NEXT: vshufpd {{.*#+}} ymm4 = ymm4[0,0,3,3]
351351
; AVX-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2,3],ymm4[4,5,6],ymm5[7]
352352
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm5 = mem[0,1,0,1]
353-
; AVX-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm7
353+
; AVX-NEXT: vblendps {{.*#+}} ymm7 = ymm0[0,1,2,3],ymm5[4,5,6,7]
354354
; AVX-NEXT: vblendps {{.*#+}} ymm4 = ymm7[0],ymm4[1,2,3],ymm7[4],ymm4[5,6,7]
355355
; AVX-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[u,u,u,2,u,u,u,7]
356356
; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4,5,6,7]

0 commit comments

Comments
 (0)