Skip to content

Commit b971920

Browse files
committed
[AArch64] Try to fold shuffle (tbl2, tbl2) to tbl4.
shuffle (tbl2, tbl2) can be folded into a single tbl4 if the mask for the selected elements is constant. Reviewed By: t.p.northover Differential Revision: https://reviews.llvm.org/D133491 (cherry-picked from ac434af)
1 parent 4fa9ce5 commit b971920

File tree

3 files changed

+329
-159
lines changed

3 files changed

+329
-159
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10412,6 +10412,51 @@ static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
1041210412
return SDValue();
1041310413
}
1041410414

10415+
// Try to fold shuffle (tbl2, tbl2) into a single tbl4.
10416+
static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op,
10417+
ArrayRef<int> ShuffleMask,
10418+
SelectionDAG &DAG) {
10419+
SDValue Tbl1 = Op->getOperand(0);
10420+
SDValue Tbl2 = Op->getOperand(1);
10421+
SDLoc dl(Op);
10422+
SDValue Tbl2ID =
10423+
DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl2, dl, MVT::i64);
10424+
10425+
EVT VT = Op.getValueType();
10426+
if (Tbl1->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
10427+
Tbl1->getOperand(0) != Tbl2ID ||
10428+
Tbl2->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
10429+
Tbl2->getOperand(0) != Tbl2ID)
10430+
return SDValue();
10431+
10432+
if (Tbl1->getValueType(0) != MVT::v16i8 ||
10433+
Tbl2->getValueType(0) != MVT::v16i8)
10434+
return SDValue();
10435+
10436+
SDValue Mask1 = Tbl1->getOperand(3);
10437+
SDValue Mask2 = Tbl2->getOperand(3);
10438+
SmallVector<SDValue, 16> TBLMaskParts(16, SDValue());
10439+
for (unsigned I = 0; I < 16; I++) {
10440+
if (ShuffleMask[I] < 16)
10441+
TBLMaskParts[I] = Mask1->getOperand(ShuffleMask[I]);
10442+
else {
10443+
auto *C =
10444+
dyn_cast<ConstantSDNode>(Mask2->getOperand(ShuffleMask[I] - 16));
10445+
if (!C)
10446+
return SDValue();
10447+
TBLMaskParts[I] = DAG.getConstant(C->getSExtValue() + 32, dl, MVT::i32);
10448+
}
10449+
}
10450+
10451+
SDValue TBLMask = DAG.getBuildVector(VT, dl, TBLMaskParts);
10452+
SDValue ID =
10453+
DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl4, dl, MVT::i64);
10454+
10455+
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v16i8,
10456+
{ID, Tbl1->getOperand(1), Tbl1->getOperand(2),
10457+
Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
10458+
}
10459+
1041510460
SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1041610461
SelectionDAG &DAG) const {
1041710462
SDLoc dl(Op);
@@ -10435,6 +10480,9 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1043510480
assert(ShuffleMask.size() == VT.getVectorNumElements() &&
1043610481
"Unexpected VECTOR_SHUFFLE mask size!");
1043710482

10483+
if (SDValue Res = tryToConvertShuffleOfTbl2ToTbl4(Op, ShuffleMask, DAG))
10484+
return Res;
10485+
1043810486
if (SVN->isSplat()) {
1043910487
int Lane = SVN->getSplatIndex();
1044010488
// If this is undef splat, generate it via "just" vdup, if possible.

0 commit comments

Comments
 (0)