@@ -1241,11 +1241,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1241
1241
setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1242
1242
setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1243
1243
setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1244
- setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1245
- setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1246
- setOperationAction(ISD::CTLZ , MVT::v8i16 , Custom);
1247
- setOperationAction(ISD::CTLZ, MVT::v4i32 , Custom);
1248
- setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1244
+
1245
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1246
+ setOperationAction(ISD::BITREVERSE , VT , Custom);
1247
+ setOperationAction(ISD::CTLZ, VT , Custom);
1248
+ }
1249
1249
1250
1250
// These might be better off as horizontal vector ops.
1251
1251
setOperationAction(ISD::ADD, MVT::i16, Custom);
@@ -1341,10 +1341,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1341
1341
// XOP can efficiently perform BITREVERSE with VPPERM.
1342
1342
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1343
1343
setOperationAction(ISD::BITREVERSE, VT, Custom);
1344
-
1345
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1346
- MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1347
- setOperationAction(ISD::BITREVERSE, VT, Custom);
1348
1344
}
1349
1345
1350
1346
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
@@ -1461,12 +1457,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1461
1457
setOperationAction(ISD::TRUNCATE, MVT::v32i32, Custom);
1462
1458
setOperationAction(ISD::TRUNCATE, MVT::v32i64, Custom);
1463
1459
1464
- setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1465
-
1466
1460
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1467
1461
setOperationAction(ISD::SETCC, VT, Custom);
1468
1462
setOperationAction(ISD::CTPOP, VT, Custom);
1469
1463
setOperationAction(ISD::CTLZ, VT, Custom);
1464
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
1470
1465
1471
1466
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
1472
1467
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -1841,8 +1836,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1841
1836
setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
1842
1837
setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
1843
1838
1844
- setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1845
-
1846
1839
for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1847
1840
setOperationAction(ISD::SRL, VT, Custom);
1848
1841
setOperationAction(ISD::SHL, VT, Custom);
@@ -1852,6 +1845,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1852
1845
setOperationAction(ISD::SETCC, VT, Custom);
1853
1846
setOperationAction(ISD::ABDS, VT, Custom);
1854
1847
setOperationAction(ISD::ABDU, VT, Custom);
1848
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
1855
1849
1856
1850
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
1857
1851
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -31180,17 +31174,25 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
31180
31174
SDValue In = Op.getOperand(0);
31181
31175
SDLoc DL(Op);
31182
31176
31183
- assert(VT.getScalarType() == MVT::i8 &&
31184
- "Only byte vector BITREVERSE supported");
31185
-
31186
- // Split v64i8 without BWI so that we can still use the PSHUFB lowering.
31187
- if (VT == MVT::v64i8 && !Subtarget.hasBWI())
31177
+ // Split 512-bit ops without BWI so that we can still use the PSHUFB lowering.
31178
+ if (VT.is512BitVector() && !Subtarget.hasBWI())
31188
31179
return splitVectorIntUnary(Op, DAG);
31189
31180
31190
31181
// Decompose 256-bit ops into smaller 128-bit ops on pre-AVX2.
31191
- if (VT == MVT::v32i8 && !Subtarget.hasInt256())
31182
+ if (VT.is256BitVector() && !Subtarget.hasInt256())
31192
31183
return splitVectorIntUnary(Op, DAG);
31193
31184
31185
+ // Lower vXi16/vXi32/vXi64 as BSWAP + vXi8 BITREVERSE.
31186
+ if (VT.getScalarType() != MVT::i8) {
31187
+ MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
31188
+ SDValue Res = DAG.getNode(ISD::BSWAP, DL, VT, In);
31189
+ Res = DAG.getBitcast(ByteVT, Res);
31190
+ Res = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Res);
31191
+ return DAG.getBitcast(VT, Res);
31192
+ }
31193
+ assert(VT.isVector() && VT.getScalarType() == MVT::i8 &&
31194
+ "Only byte vector BITREVERSE supported");
31195
+
31194
31196
unsigned NumElts = VT.getVectorNumElements();
31195
31197
31196
31198
// If we have GFNI, we can use GF2P8AFFINEQB to reverse the bits.
0 commit comments