Skip to content

Commit a6a258f

Browse files
committed
[X86][AVX] Fold concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128
Fixes regression exposed by removing bitcasts across logic-ops in D96206. Differential Revision: https://reviews.llvm.org/D96206
1 parent 2885d12 commit a6a258f

File tree

2 files changed

+25
-4
lines changed

2 files changed

+25
-4
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49249,6 +49249,29 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
4924949249
}
4925049250
}
4925149251

49252+
// concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128.
49253+
// Only concat of subvector high halves which vperm2x128 is best at.
49254+
// TODO: This should go in combineX86ShufflesRecursively eventually.
49255+
if (VT.is256BitVector() && Ops.size() == 2) {
49256+
SDValue Src0 = peekThroughBitcasts(Ops[0]);
49257+
SDValue Src1 = peekThroughBitcasts(Ops[1]);
49258+
if (Src0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
49259+
Src1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
49260+
EVT SrcVT0 = Src0.getOperand(0).getValueType();
49261+
EVT SrcVT1 = Src1.getOperand(0).getValueType();
49262+
unsigned NumSrcElts0 = SrcVT0.getVectorNumElements();
49263+
unsigned NumSrcElts1 = SrcVT1.getVectorNumElements();
49264+
if (SrcVT0.is256BitVector() && SrcVT1.is256BitVector() &&
49265+
Src0.getConstantOperandAPInt(1) == (NumSrcElts0 / 2) &&
49266+
Src1.getConstantOperandAPInt(1) == (NumSrcElts1 / 2)) {
49267+
return DAG.getNode(X86ISD::VPERM2X128, DL, VT,
49268+
DAG.getBitcast(VT, Src0.getOperand(0)),
49269+
DAG.getBitcast(VT, Src1.getOperand(0)),
49270+
DAG.getTargetConstant(0x31, DL, MVT::i8));
49271+
}
49272+
}
49273+
}
49274+
4925249275
// Repeated opcode.
4925349276
// TODO - combineX86ShufflesRecursively should handle shuffle concatenation
4925449277
// but it currently struggles with different vector widths.

llvm/test/CodeGen/X86/pr40891.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,9 @@ define <8 x i32> @foo(<8 x i64> %x, <4 x i64> %y) {
88
; CHECK: # %bb.0:
99
; CHECK-NEXT: vandps %ymm2, %ymm0, %ymm0
1010
; CHECK-NEXT: vandps {{\.LCPI.*}}, %ymm1, %ymm1
11-
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm2
12-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
13-
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
11+
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1412
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
15-
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm2[0,2],ymm0[0,2],ymm2[4,6],ymm0[4,6]
13+
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
1614
; CHECK-NEXT: retl
1715
%a = shufflevector <4 x i64> %y, <4 x i64> <i64 12345, i64 67890, i64 13579, i64 24680>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1816
%b = and <8 x i64> %x, %a

0 commit comments

Comments
 (0)