@@ -31312,17 +31312,63 @@ static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) {
31312
31312
return DAG.getBitcast(VT, Res);
31313
31313
}
31314
31314
31315
+ static auto createBSWAPShuffleMask(EVT VT) {
31316
+ SmallVector<int, 16> ShuffleMask;
31317
+ int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
31318
+ for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
31319
+ for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
31320
+ ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
31321
+
31322
+ return ShuffleMask;
31323
+ }
31324
+
31315
31325
static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
31316
31326
SelectionDAG &DAG) {
31317
31327
MVT VT = Op.getSimpleValueType();
31328
+ SDValue In = Op.getOperand(0);
31329
+ SDLoc DL(Op);
31330
+
31331
+ auto HasGFNI = Subtarget.hasGFNI();
31332
+ auto ScalarType = VT.getScalarType();
31333
+
31334
+ if (HasGFNI && ((ScalarType == MVT::i32) || (ScalarType == MVT::i64))) {
31335
+ if (VT.isVector()) {
31336
+ SmallVector<int, 16> BSWAPMask = createBSWAPShuffleMask(VT);
31337
+ EVT ByteVT =
31338
+ EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
31339
+ SDValue VecShuffle = DAG.getVectorShuffle(
31340
+ ByteVT, DL, DAG.getNode(ISD::BITCAST, DL, ByteVT, In),
31341
+ DAG.getUNDEF(ByteVT), BSWAPMask);
31342
+ SDValue BitReverse = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, VecShuffle);
31343
+ return DAG.getBitcast(VT, BitReverse);
31344
+ } else {
31345
+ auto CastTo = ScalarType == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
31346
+ SDValue ScalarToVector =
31347
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, CastTo, In);
31348
+ SDValue BitReverse =
31349
+ DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8,
31350
+ DAG.getBitcast(MVT::v16i8, ScalarToVector));
31351
+ SDValue ExtractElementZero = DAG.getNode(
31352
+ ISD::EXTRACT_VECTOR_ELT, DL, ScalarType,
31353
+ DAG.getBitcast(CastTo, BitReverse), DAG.getIntPtrConstant(0, DL));
31354
+ return DAG.getNode(ISD::BSWAP, DL, ScalarType, ExtractElementZero);
31355
+ }
31356
+ }
31318
31357
31319
31358
if (Subtarget.hasXOP() && !VT.is512BitVector())
31320
31359
return LowerBITREVERSE_XOP(Op, DAG);
31321
31360
31322
31361
assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE");
31323
31362
31324
- SDValue In = Op.getOperand(0);
31325
- SDLoc DL(Op);
31363
+ assert(VT.getScalarType() == MVT::i8 &&
31364
+ "Only byte vector BITREVERSE supported");
31365
+
31366
+ // Split v64i8 without BWI so that we can still use the PSHUFB lowering.
31367
+
31368
+ if (Subtarget.hasXOP() && !VT.is512BitVector())
31369
+ return LowerBITREVERSE_XOP(Op, DAG);
31370
+
31371
+ assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE");
31326
31372
31327
31373
// Split 512-bit ops without BWI so that we can still use the PSHUFB lowering.
31328
31374
if (VT.is512BitVector() && !Subtarget.hasBWI())
@@ -31346,7 +31392,7 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
31346
31392
unsigned NumElts = VT.getVectorNumElements();
31347
31393
31348
31394
// If we have GFNI, we can use GF2P8AFFINEQB to reverse the bits.
31349
- if (Subtarget.hasGFNI() ) {
31395
+ if (HasGFNI ) {
31350
31396
MVT MatrixVT = MVT::getVectorVT(MVT::i64, NumElts / 8);
31351
31397
SDValue Matrix = DAG.getConstant(0x8040201008040201ULL, DL, MatrixVT);
31352
31398
Matrix = DAG.getBitcast(VT, Matrix);
0 commit comments