@@ -10412,6 +10412,51 @@ static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
10412
10412
return SDValue();
10413
10413
}
10414
10414
10415
+ // Try to fold shuffle (tbl2, tbl2) into a single tbl4.
10416
+ static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op,
10417
+ ArrayRef<int> ShuffleMask,
10418
+ SelectionDAG &DAG) {
10419
+ SDValue Tbl1 = Op->getOperand(0);
10420
+ SDValue Tbl2 = Op->getOperand(1);
10421
+ SDLoc dl(Op);
10422
+ SDValue Tbl2ID =
10423
+ DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl2, dl, MVT::i64);
10424
+
10425
+ EVT VT = Op.getValueType();
10426
+ if (Tbl1->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
10427
+ Tbl1->getOperand(0) != Tbl2ID ||
10428
+ Tbl2->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
10429
+ Tbl2->getOperand(0) != Tbl2ID)
10430
+ return SDValue();
10431
+
10432
+ if (Tbl1->getValueType(0) != MVT::v16i8 ||
10433
+ Tbl2->getValueType(0) != MVT::v16i8)
10434
+ return SDValue();
10435
+
10436
+ SDValue Mask1 = Tbl1->getOperand(3);
10437
+ SDValue Mask2 = Tbl2->getOperand(3);
10438
+ SmallVector<SDValue, 16> TBLMaskParts(16, SDValue());
10439
+ for (unsigned I = 0; I < 16; I++) {
10440
+ if (ShuffleMask[I] < 16)
10441
+ TBLMaskParts[I] = Mask1->getOperand(ShuffleMask[I]);
10442
+ else {
10443
+ auto *C =
10444
+ dyn_cast<ConstantSDNode>(Mask2->getOperand(ShuffleMask[I] - 16));
10445
+ if (!C)
10446
+ return SDValue();
10447
+ TBLMaskParts[I] = DAG.getConstant(C->getSExtValue() + 32, dl, MVT::i32);
10448
+ }
10449
+ }
10450
+
10451
+ SDValue TBLMask = DAG.getBuildVector(VT, dl, TBLMaskParts);
10452
+ SDValue ID =
10453
+ DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl4, dl, MVT::i64);
10454
+
10455
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v16i8,
10456
+ {ID, Tbl1->getOperand(1), Tbl1->getOperand(2),
10457
+ Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
10458
+ }
10459
+
10415
10460
SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10416
10461
SelectionDAG &DAG) const {
10417
10462
SDLoc dl(Op);
@@ -10435,6 +10480,9 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10435
10480
assert(ShuffleMask.size() == VT.getVectorNumElements() &&
10436
10481
"Unexpected VECTOR_SHUFFLE mask size!");
10437
10482
10483
+ if (SDValue Res = tryToConvertShuffleOfTbl2ToTbl4(Op, ShuffleMask, DAG))
10484
+ return Res;
10485
+
10438
10486
if (SVN->isSplat()) {
10439
10487
int Lane = SVN->getSplatIndex();
10440
10488
// If this is undef splat, generate it via "just" vdup, if possible.
0 commit comments