Skip to content

Commit 9bcb9c2

Browse files
author
shami
committed
Perform bitreverse using AVX512 GFNI for i32 and i64.
1 parent 313a33b commit 9bcb9c2

File tree

3 files changed

+209
-274
lines changed

3 files changed

+209
-274
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31312,17 +31312,63 @@ static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) {
3131231312
return DAG.getBitcast(VT, Res);
3131331313
}
3131431314

31315+
static auto createBSWAPShuffleMask(EVT VT) {
31316+
SmallVector<int, 16> ShuffleMask;
31317+
int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
31318+
for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
31319+
for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
31320+
ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
31321+
31322+
return ShuffleMask;
31323+
}
31324+
3131531325
static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
3131631326
SelectionDAG &DAG) {
3131731327
MVT VT = Op.getSimpleValueType();
31328+
SDValue In = Op.getOperand(0);
31329+
SDLoc DL(Op);
31330+
31331+
auto HasGFNI = Subtarget.hasGFNI();
31332+
auto ScalarType = VT.getScalarType();
31333+
31334+
if (HasGFNI && ((ScalarType == MVT::i32) || (ScalarType == MVT::i64))) {
31335+
if (VT.isVector()) {
31336+
SmallVector<int, 16> BSWAPMask = createBSWAPShuffleMask(VT);
31337+
EVT ByteVT =
31338+
EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
31339+
SDValue VecShuffle = DAG.getVectorShuffle(
31340+
ByteVT, DL, DAG.getNode(ISD::BITCAST, DL, ByteVT, In),
31341+
DAG.getUNDEF(ByteVT), BSWAPMask);
31342+
SDValue BitReverse = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, VecShuffle);
31343+
return DAG.getBitcast(VT, BitReverse);
31344+
} else {
31345+
auto CastTo = ScalarType == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
31346+
SDValue ScalarToVector =
31347+
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, CastTo, In);
31348+
SDValue BitReverse =
31349+
DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8,
31350+
DAG.getBitcast(MVT::v16i8, ScalarToVector));
31351+
SDValue ExtractElementZero = DAG.getNode(
31352+
ISD::EXTRACT_VECTOR_ELT, DL, ScalarType,
31353+
DAG.getBitcast(CastTo, BitReverse), DAG.getIntPtrConstant(0, DL));
31354+
return DAG.getNode(ISD::BSWAP, DL, ScalarType, ExtractElementZero);
31355+
}
31356+
}
3131831357

3131931358
if (Subtarget.hasXOP() && !VT.is512BitVector())
3132031359
return LowerBITREVERSE_XOP(Op, DAG);
3132131360

3132231361
assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE");
3132331362

31324-
SDValue In = Op.getOperand(0);
31325-
SDLoc DL(Op);
31363+
assert(VT.getScalarType() == MVT::i8 &&
31364+
"Only byte vector BITREVERSE supported");
31365+
31366+
// Split v64i8 without BWI so that we can still use the PSHUFB lowering.
31367+
31368+
if (Subtarget.hasXOP() && !VT.is512BitVector())
31369+
return LowerBITREVERSE_XOP(Op, DAG);
31370+
31371+
assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE");
3132631372

3132731373
// Split 512-bit ops without BWI so that we can still use the PSHUFB lowering.
3132831374
if (VT.is512BitVector() && !Subtarget.hasBWI())
@@ -31346,7 +31392,7 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
3134631392
unsigned NumElts = VT.getVectorNumElements();
3134731393

3134831394
// If we have GFNI, we can use GF2P8AFFINEQB to reverse the bits.
31349-
if (Subtarget.hasGFNI()) {
31395+
if (HasGFNI) {
3135031396
MVT MatrixVT = MVT::getVectorVT(MVT::i64, NumElts / 8);
3135131397
SDValue Matrix = DAG.getConstant(0x8040201008040201ULL, DL, MatrixVT);
3135231398
Matrix = DAG.getBitcast(VT, Matrix);

0 commit comments

Comments
 (0)