Skip to content

Commit 500e39d

Browse files
authored
[X86] combineTargetShuffle - attempt to fold VPERM2X128(ONEUSE(LOAD),UNDEF) -> VBROADCAST128 (llvm#142366)
Matches what we do in lowerV2X128Shuffle, where we often fail the oneuse test as we might not have split other uses at that point.
1 parent cda5ca8 commit 500e39d

File tree

3 files changed

+26
-7
lines changed

3 files changed

+26
-7
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42618,9 +42618,11 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4261842618
return SDValue();
4261942619
}
4262042620
case X86ISD::VPERM2X128: {
42621-
// Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)).
4262242621
SDValue LHS = N->getOperand(0);
4262342622
SDValue RHS = N->getOperand(1);
42623+
unsigned Imm = N.getConstantOperandVal(2);
42624+
42625+
// Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)).
4262442626
if (LHS.getOpcode() == ISD::BITCAST &&
4262542627
(RHS.getOpcode() == ISD::BITCAST || RHS.isUndef())) {
4262642628
EVT SrcVT = LHS.getOperand(0).getValueType();
@@ -42653,7 +42655,6 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4265342655
}
4265442656
return SDValue();
4265542657
};
42656-
unsigned Imm = N.getConstantOperandVal(2);
4265742658
if (SDValue SubLo = FindSubVector128(Imm & 0x0F)) {
4265842659
if (SDValue SubHi = FindSubVector128((Imm & 0xF0) >> 4)) {
4265942660
MVT SubVT = VT.getHalfNumVectorElementsVT();
@@ -42662,6 +42663,24 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4266242663
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubLo, SubHi);
4266342664
}
4266442665
}
42666+
42667+
// Attempt to match VBROADCAST*128 subvector broadcast load.
42668+
if (RHS.isUndef()) {
42669+
SmallVector<int, 4> Mask;
42670+
DecodeVPERM2X128Mask(4, Imm, Mask);
42671+
if (isUndefOrInRange(Mask, 0, 4)) {
42672+
bool SplatLo = isShuffleEquivalent(Mask, {0, 1, 0, 1}, LHS);
42673+
bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, LHS);
42674+
if ((SplatLo || SplatHi) && !Subtarget.hasAVX512() &&
42675+
X86::mayFoldLoad(LHS, Subtarget)) {
42676+
MVT MemVT = VT.getHalfNumVectorElementsVT();
42677+
unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize();
42678+
return getBROADCAST_LOAD(X86ISD::SUBV_BROADCAST_LOAD, DL, VT, MemVT,
42679+
cast<LoadSDNode>(LHS), Ofs, DAG);
42680+
}
42681+
}
42682+
}
42683+
4266542684
return SDValue();
4266642685
}
4266742686
case X86ISD::PSHUFD:

llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9613,7 +9613,7 @@ define void @store_i32_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
96139613
; AVX-NEXT: vshufps {{.*#+}} ymm1 = ymm5[0,1],ymm1[2,0],ymm5[4,5],ymm1[6,4]
96149614
; AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
96159615
; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm4[6,7]
9616-
; AVX-NEXT: vperm2f128 {{.*#+}} ymm1 = mem[2,3,2,3]
9616+
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = mem[0,1,0,1]
96179617
; AVX-NEXT: vshufps {{.*#+}} ymm1 = ymm1[1,1,2,2,5,5,6,6]
96189618
; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
96199619
; AVX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm3[2,3],ymm0[2,3]

llvm/test/CodeGen/X86/x86-interleaved-access.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1632,12 +1632,12 @@ ret void
16321632
define void @splat2_v4f64_load_store(ptr %s, ptr %d) nounwind {
16331633
; AVX1-LABEL: splat2_v4f64_load_store:
16341634
; AVX1: # %bb.0:
1635-
; AVX1-NEXT: vperm2f128 $51, (%rdi), %ymm0, %ymm0 # ymm0 = mem[2,3,2,3]
1635+
; AVX1-NEXT: vbroadcastf128 (%rdi), %ymm0 # ymm0 = mem[0,1,0,1]
16361636
; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0,0,3,3]
1637-
; AVX1-NEXT: vbroadcastf128 (%rdi), %ymm1 # ymm1 = mem[0,1,0,1]
1637+
; AVX1-NEXT: vbroadcastf128 16(%rdi), %ymm1 # ymm1 = mem[0,1,0,1]
16381638
; AVX1-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[0,0,3,3]
1639-
; AVX1-NEXT: vmovupd %ymm0, 32(%rsi)
1640-
; AVX1-NEXT: vmovupd %ymm1, (%rsi)
1639+
; AVX1-NEXT: vmovupd %ymm1, 32(%rsi)
1640+
; AVX1-NEXT: vmovupd %ymm0, (%rsi)
16411641
; AVX1-NEXT: vzeroupper
16421642
; AVX1-NEXT: retq
16431643
;

0 commit comments

Comments
 (0)