@@ -3820,6 +3820,24 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
3820
3820
{ ISD::FSQRT, MVT::v2f64, { 27 , 27 , 1 , 1 } }, // vsqrtpd
3821
3821
{ ISD::FSQRT, MVT::v4f64, { 54 , 54 , 1 , 3 } }, // vsqrtpd
3822
3822
};
3823
+ static const CostKindTblEntry GFNICostTbl[] = {
3824
+ { ISD::BITREVERSE, MVT::i8 , { 3 , 3 , 3 , 4 } }, // gf2p8affineqb
3825
+ { ISD::BITREVERSE, MVT::i16 , { 3 , 3 , 4 , 6 } }, // gf2p8affineqb
3826
+ { ISD::BITREVERSE, MVT::i32 , { 3 , 3 , 4 , 5 } }, // gf2p8affineqb
3827
+ { ISD::BITREVERSE, MVT::i64 , { 3 , 3 , 4 , 6 } }, // gf2p8affineqb
3828
+ { ISD::BITREVERSE, MVT::v16i8, { 1 , 6 , 1 , 2 } }, // gf2p8affineqb
3829
+ { ISD::BITREVERSE, MVT::v32i8, { 1 , 6 , 1 , 2 } }, // gf2p8affineqb
3830
+ { ISD::BITREVERSE, MVT::v64i8, { 1 , 6 , 1 , 2 } }, // gf2p8affineqb
3831
+ { ISD::BITREVERSE, MVT::v8i16, { 1 , 8 , 2 , 4 } }, // gf2p8affineqb
3832
+ { ISD::BITREVERSE, MVT::v16i16, { 1 , 9 , 2 , 4 } }, // gf2p8affineqb
3833
+ { ISD::BITREVERSE, MVT::v32i16, { 1 , 9 , 2 , 4 } }, // gf2p8affineqb
3834
+ { ISD::BITREVERSE, MVT::v4i32, { 1 , 8 , 2 , 4 } }, // gf2p8affineqb
3835
+ { ISD::BITREVERSE, MVT::v8i32, { 1 , 9 , 2 , 4 } }, // gf2p8affineqb
3836
+ { ISD::BITREVERSE, MVT::v16i32, { 1 , 9 , 2 , 4 } }, // gf2p8affineqb
3837
+ { ISD::BITREVERSE, MVT::v2i64, { 1 , 8 , 2 , 4 } }, // gf2p8affineqb
3838
+ { ISD::BITREVERSE, MVT::v4i64, { 1 , 9 , 2 , 4 } }, // gf2p8affineqb
3839
+ { ISD::BITREVERSE, MVT::v8i64, { 1 , 9 , 2 , 4 } }, // gf2p8affineqb
3840
+ };
3823
3841
static const CostKindTblEntry GLMCostTbl[] = {
3824
3842
{ ISD::FSQRT, MVT::f32 , { 19 , 20 , 1 , 1 } }, // sqrtss
3825
3843
{ ISD::FSQRT, MVT::v4f32, { 37 , 41 , 1 , 5 } }, // sqrtps
@@ -4156,23 +4174,6 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
4156
4174
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost (OpTy);
4157
4175
MVT MTy = LT.second ;
4158
4176
4159
- // Attempt to lookup cost.
4160
- if (ISD == ISD::BITREVERSE && ST->hasGFNI () && ST->hasSSSE3 () &&
4161
- MTy.isVector ()) {
4162
- // With PSHUFB the code is very similar for all types. If we have integer
4163
- // byte operations, we just need a GF2P8AFFINEQB for vXi8. For other types
4164
- // we also need a PSHUFB.
4165
- unsigned Cost = MTy.getVectorElementType () == MVT::i8 ? 1 : 2 ;
4166
-
4167
- // Without byte operations, we need twice as many GF2P8AFFINEQB and PSHUFB
4168
- // instructions. We also need an extract and an insert.
4169
- if (!(MTy.is128BitVector () || (ST->hasAVX2 () && MTy.is256BitVector ()) ||
4170
- (ST->hasBWI () && MTy.is512BitVector ())))
4171
- Cost = Cost * 2 + 2 ;
4172
-
4173
- return LT.first * Cost;
4174
- }
4175
-
4176
4177
// Without BMI/LZCNT see if we're only looking for a *_ZERO_UNDEF cost.
4177
4178
if (((ISD == ISD::CTTZ && !ST->hasBMI ()) ||
4178
4179
(ISD == ISD::CTLZ && !ST->hasLZCNT ())) &&
@@ -4230,6 +4231,12 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
4230
4231
return adjustTableCost (Entry->ISD , *KindCost, LT.first ,
4231
4232
ICA.getFlags ());
4232
4233
4234
+ if (ST->hasGFNI ())
4235
+ if (const auto *Entry = CostTableLookup (GFNICostTbl, ISD, MTy))
4236
+ if (auto KindCost = Entry->Cost [CostKind])
4237
+ return adjustTableCost (Entry->ISD , *KindCost, LT.first ,
4238
+ ICA.getFlags ());
4239
+
4233
4240
if (ST->hasCDI ())
4234
4241
if (const auto *Entry = CostTableLookup (AVX512CDCostTbl, ISD, MTy))
4235
4242
if (auto KindCost = Entry->Cost [CostKind])
0 commit comments