@@ -10795,6 +10795,51 @@ static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
10795
10795
return SDValue();
10796
10796
}
10797
10797
10798
+ // Try to fold shuffle (tbl2, tbl2) into a single tbl4.
10799
+ static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op,
10800
+ ArrayRef<int> ShuffleMask,
10801
+ SelectionDAG &DAG) {
10802
+ SDValue Tbl1 = Op->getOperand(0);
10803
+ SDValue Tbl2 = Op->getOperand(1);
10804
+ SDLoc dl(Op);
10805
+ SDValue Tbl2ID =
10806
+ DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl2, dl, MVT::i64);
10807
+
10808
+ EVT VT = Op.getValueType();
10809
+ if (Tbl1->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
10810
+ Tbl1->getOperand(0) != Tbl2ID ||
10811
+ Tbl2->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
10812
+ Tbl2->getOperand(0) != Tbl2ID)
10813
+ return SDValue();
10814
+
10815
+ if (Tbl1->getValueType(0) != MVT::v16i8 ||
10816
+ Tbl2->getValueType(0) != MVT::v16i8)
10817
+ return SDValue();
10818
+
10819
+ SDValue Mask1 = Tbl1->getOperand(3);
10820
+ SDValue Mask2 = Tbl2->getOperand(3);
10821
+ SmallVector<SDValue, 16> TBLMaskParts(16, SDValue());
10822
+ for (unsigned I = 0; I < 16; I++) {
10823
+ if (ShuffleMask[I] < 16)
10824
+ TBLMaskParts[I] = Mask1->getOperand(ShuffleMask[I]);
10825
+ else {
10826
+ auto *C =
10827
+ dyn_cast<ConstantSDNode>(Mask2->getOperand(ShuffleMask[I] - 16));
10828
+ if (!C)
10829
+ return SDValue();
10830
+ TBLMaskParts[I] = DAG.getConstant(C->getSExtValue() + 32, dl, MVT::i32);
10831
+ }
10832
+ }
10833
+
10834
+ SDValue TBLMask = DAG.getBuildVector(VT, dl, TBLMaskParts);
10835
+ SDValue ID =
10836
+ DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl4, dl, MVT::i64);
10837
+
10838
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v16i8,
10839
+ {ID, Tbl1->getOperand(1), Tbl1->getOperand(2),
10840
+ Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
10841
+ }
10842
+
10798
10843
SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10799
10844
SelectionDAG &DAG) const {
10800
10845
SDLoc dl(Op);
@@ -10818,6 +10863,9 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10818
10863
assert(ShuffleMask.size() == VT.getVectorNumElements() &&
10819
10864
"Unexpected VECTOR_SHUFFLE mask size!");
10820
10865
10866
+ if (SDValue Res = tryToConvertShuffleOfTbl2ToTbl4(Op, ShuffleMask, DAG))
10867
+ return Res;
10868
+
10821
10869
if (SVN->isSplat()) {
10822
10870
int Lane = SVN->getSplatIndex();
10823
10871
// If this is undef splat, generate it via "just" vdup, if possible.
0 commit comments