Skip to content

Commit d72a7d4

Browse files
committed
[X86] combineINSERT_SUBVECTOR - generalise insert_subvector(x,extract(broadcast)) -> blend
Don't match against specific broadcast nodes and let isShuffleEquivalent handle it
1 parent 5ddcd76 commit d72a7d4

File tree

6 files changed

+26
-29
lines changed

6 files changed

+26
-29
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -59222,36 +59222,33 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5922259222
!(Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())))) {
5922359223
SDValue ExtSrc = SubVec.getOperand(0);
5922459224
int ExtIdxVal = SubVec.getConstantOperandVal(1);
59225-
if (ExtIdxVal != 0) {
59226-
SmallVector<int, 64> Mask(VecNumElts);
59227-
// First create an identity shuffle mask.
59228-
for (int i = 0; i != VecNumElts; ++i)
59229-
Mask[i] = i;
59230-
// Now insert the extracted portion.
59231-
for (int i = 0; i != SubVecNumElts; ++i)
59232-
Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts;
59225+
// Create a shuffle mask matching the extraction and insertion.
59226+
SmallVector<int, 64> Mask(VecNumElts);
59227+
std::iota(Mask.begin(), Mask.end(), 0);
59228+
std::iota(Mask.begin() + IdxVal, Mask.begin() + IdxVal + SubVecNumElts,
59229+
ExtIdxVal + VecNumElts);
59230+
if (ExtIdxVal != 0)
5923359231
return DAG.getVectorShuffle(OpVT, dl, Vec, ExtSrc, Mask);
59234-
}
59235-
// If we're broadcasting, see if we can use a blend instead of
59236-
// extract/insert pair. Ensure that the subvector is aligned with the
59237-
// insertion/extractions.
59238-
if ((ExtIdxVal % SubVecNumElts) == 0 && (IdxVal % SubVecNumElts) == 0 &&
59239-
(ExtSrc.getOpcode() == X86ISD::VBROADCAST ||
59240-
ExtSrc.getOpcode() == X86ISD::VBROADCAST_LOAD ||
59241-
(ExtSrc.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
59242-
cast<MemIntrinsicSDNode>(ExtSrc)->getMemoryVT() == SubVecVT))) {
59232+
// See if we can use a blend instead of extract/insert pair.
59233+
SmallVector<int, 64> BlendMask(VecNumElts);
59234+
std::iota(BlendMask.begin(), BlendMask.end(), 0);
59235+
std::iota(BlendMask.begin() + IdxVal,
59236+
BlendMask.begin() + IdxVal + SubVecNumElts, VecNumElts + IdxVal);
59237+
if (isShuffleEquivalent(Mask, BlendMask, Vec, ExtSrc)) {
59238+
assert((IdxVal == 0 || IdxVal == SubVecNumElts) &&
59239+
"Unaligned subvector insertion");
5924359240
if (OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
59244-
uint64_t BlendMask = IdxVal == 0 ? 0x0F : 0xF0;
5924559241
SDValue Blend = DAG.getNode(
5924659242
X86ISD::BLENDI, dl, MVT::v8f32, DAG.getBitcast(MVT::v8f32, Vec),
5924759243
DAG.getBitcast(MVT::v8f32, ExtSrc),
59248-
DAG.getTargetConstant(BlendMask, dl, MVT::i8));
59244+
DAG.getTargetConstant(IdxVal == 0 ? 0x0F : 0xF0, dl, MVT::i8));
5924959245
return DAG.getBitcast(OpVT, Blend);
5925059246
} else if (OpVT.is512BitVector() && SubVecVT.is256BitVector()) {
59251-
SDValue Lo = DAG.getBitcast(MVT::v8f64, IdxVal == 0 ? ExtSrc : Vec);
59252-
SDValue Hi = DAG.getBitcast(MVT::v8f64, IdxVal == 0 ? Vec : ExtSrc);
59247+
MVT ShufVT = OpVT.isInteger() ? MVT::v8i64 : MVT::v8f64;
59248+
SDValue Lo = DAG.getBitcast(ShufVT, IdxVal == 0 ? ExtSrc : Vec);
59249+
SDValue Hi = DAG.getBitcast(ShufVT, IdxVal == 0 ? Vec : ExtSrc);
5925359250
SDValue Shuffle =
59254-
DAG.getNode(X86ISD::SHUF128, dl, MVT::v8f64, Lo, Hi,
59251+
DAG.getNode(X86ISD::SHUF128, dl, ShufVT, Lo, Hi,
5925559252
getV4X86ShuffleImm8ForMask({0, 1, 2, 3}, dl, DAG));
5925659253
return DAG.getBitcast(OpVT, Shuffle);
5925759254
}

llvm/test/CodeGen/X86/insert-subvector-broadcast.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ define void @insert_subvector_broadcast_as_blend() {
77
; CHECK-NEXT: movq (%rax), %rax
88
; CHECK-NEXT: incq %rax
99
; CHECK-NEXT: vpbroadcastq %rax, %zmm0
10-
; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
11-
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
12-
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm0[4,5,6,7]
10+
; CHECK-NEXT: vpmovsxbq {{.*#+}} zmm1 = [8,0,0,1,4,5,6,7]
11+
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
12+
; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1
1313
; CHECK-NEXT: vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k0
1414
; CHECK-NEXT: vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k1
1515
; CHECK-NEXT: kunpckbw %k0, %k1, %k1

llvm/test/CodeGen/X86/widen_fadd.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ define void @widen_fadd_v2f32_v16f32(ptr %a0, ptr %b0, ptr %c0) {
221221
; AVX512F-NEXT: vinsertf32x4 $1, %xmm3, %zmm2, %zmm2
222222
; AVX512F-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
223223
; AVX512F-NEXT: vpermt2pd %zmm2, %zmm5, %zmm0
224-
; AVX512F-NEXT: vinsertf64x4 $0, %ymm0, %zmm4, %zmm0
224+
; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm4[4,5,6,7]
225225
; AVX512F-NEXT: vmovupd %zmm0, (%rdx)
226226
; AVX512F-NEXT: vzeroupper
227227
; AVX512F-NEXT: retq

llvm/test/CodeGen/X86/widen_fdiv.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ define void @widen_fdiv_v2f32_v16f32(ptr %a0, ptr %b0, ptr %c0) {
182182
; AVX512F-NEXT: vinsertf32x4 $1, %xmm3, %zmm2, %zmm2
183183
; AVX512F-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
184184
; AVX512F-NEXT: vpermt2pd %zmm2, %zmm5, %zmm0
185-
; AVX512F-NEXT: vinsertf64x4 $0, %ymm0, %zmm4, %zmm0
185+
; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm4[4,5,6,7]
186186
; AVX512F-NEXT: vmovupd %zmm0, (%rdx)
187187
; AVX512F-NEXT: vzeroupper
188188
; AVX512F-NEXT: retq

llvm/test/CodeGen/X86/widen_fmul.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ define void @widen_fmul_v2f32_v16f32(ptr %a0, ptr %b0, ptr %c0) {
221221
; AVX512F-NEXT: vinsertf32x4 $1, %xmm3, %zmm2, %zmm2
222222
; AVX512F-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
223223
; AVX512F-NEXT: vpermt2pd %zmm2, %zmm5, %zmm0
224-
; AVX512F-NEXT: vinsertf64x4 $0, %ymm0, %zmm4, %zmm0
224+
; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm4[4,5,6,7]
225225
; AVX512F-NEXT: vmovupd %zmm0, (%rdx)
226226
; AVX512F-NEXT: vzeroupper
227227
; AVX512F-NEXT: retq

llvm/test/CodeGen/X86/widen_fsub.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ define void @widen_fsub_v2f32_v16f32(ptr %a0, ptr %b0, ptr %c0) {
221221
; AVX512F-NEXT: vinsertf32x4 $1, %xmm3, %zmm2, %zmm2
222222
; AVX512F-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
223223
; AVX512F-NEXT: vpermt2pd %zmm2, %zmm5, %zmm0
224-
; AVX512F-NEXT: vinsertf64x4 $0, %ymm0, %zmm4, %zmm0
224+
; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm4[4,5,6,7]
225225
; AVX512F-NEXT: vmovupd %zmm0, (%rdx)
226226
; AVX512F-NEXT: vzeroupper
227227
; AVX512F-NEXT: retq

0 commit comments

Comments
 (0)