Skip to content

Commit 15ede92

Browse files
committed
[X86] Use GFNI for vXi8 shifts/rotates
As detailed here: https://github.com/InstLatx64/InstLatX64_Demo/blob/master/GFNI_Demo.h We can use the gf2p8affine instruction to lower byte shifts/rotates as well as the existing bitreverse case. There's a few other GFNI patterns we can probably handle - e.g. TZCNT/LZCNT were detailed on PR47394 Based off the original patch here: https://reviews.llvm.org/D137026
1 parent c02ed29 commit 15ede92

23 files changed

+476
-706
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28964,6 +28964,33 @@ SDValue X86TargetLowering::LowerWin64_INT128_TO_FP(SDValue Op,
2896428964
return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
2896528965
}
2896628966

28967+
// Generate a GFNI gf2p8affine bitmask for vXi8 bitreverse/shift/rotate.
28968+
uint64_t getGFNICtrlImm(unsigned Opcode, unsigned Amt = 0) {
28969+
switch (Opcode) {
28970+
case ISD::BITREVERSE:
28971+
return 0x8040201008040201ULL;
28972+
case ISD::SHL:
28973+
assert((0 < Amt && Amt < 8) && "Shift amount out of range");
28974+
return ((0x0102040810204080ULL >> (Amt)) &
28975+
(0x0101010101010101ULL * (0xFF >> (Amt))));
28976+
case ISD::SRL:
28977+
assert((0 < Amt && Amt < 8) && "Shift amount out of range");
28978+
return ((0x0102040810204080ULL << (Amt)) &
28979+
(0x0101010101010101ULL * ((0xFF << (Amt)) & 0xFF)));
28980+
case ISD::SRA:
28981+
assert((0 < Amt && Amt < 8) && "Shift amount out of range");
28982+
return (getGFNICtrlImm(ISD::SRL, Amt) |
28983+
(0x8080808080808080ULL >> (64 - (8 * Amt))));
28984+
case ISD::ROTL:
28985+
assert((0 < Amt && Amt < 8) && "Rotate amount out of range");
28986+
return getGFNICtrlImm(ISD::SRL, 8 - Amt) | getGFNICtrlImm(ISD::SHL, Amt);
28987+
case ISD::ROTR:
28988+
assert((0 < Amt && Amt < 8) && "Rotate amount out of range");
28989+
return getGFNICtrlImm(ISD::SHL, 8 - Amt) | getGFNICtrlImm(ISD::SRL, Amt);
28990+
}
28991+
llvm_unreachable("Unsupported GFNI opcode");
28992+
}
28993+
2896728994
// Return true if the required (according to Opcode) shift-imm form is natively
2896828995
// supported by the Subtarget
2896928996
static bool supportedVectorShiftWithImm(EVT VT, const X86Subtarget &Subtarget,
@@ -29151,6 +29178,14 @@ static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG,
2915129178
if (VT == MVT::v16i8 && Subtarget.hasXOP())
2915229179
return SDValue();
2915329180

29181+
if (Subtarget.hasGFNI()) {
29182+
uint64_t ShiftMask = getGFNICtrlImm(Op.getOpcode(), ShiftAmt);
29183+
MVT MaskVT = MVT::getVectorVT(MVT::i64, NumElts / 8);
29184+
SDValue Mask = DAG.getBitcast(VT, DAG.getConstant(ShiftMask, dl, MaskVT));
29185+
return DAG.getNode(X86ISD::GF2P8AFFINEQB, dl, VT, R, Mask,
29186+
DAG.getTargetConstant(0, dl, MVT::i8));
29187+
}
29188+
2915429189
if (Op.getOpcode() == ISD::SHL) {
2915529190
// Make a large shift.
2915629191
SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R,
@@ -30022,6 +30057,18 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
3002230057
DAG.getNode(ISD::SUB, DL, VT, Z, Amt));
3002330058
}
3002430059

30060+
// Attempt to use GFNI gf2p8affine to rotate vXi8 by an uniform constant.
30061+
if (IsCstSplat && Subtarget.hasGFNI() &&
30062+
(VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasAVX()) ||
30063+
(VT == MVT::v64i8 && Subtarget.useBWIRegs()))) {
30064+
uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits);
30065+
uint64_t RotMask = getGFNICtrlImm(Opcode, RotAmt);
30066+
MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
30067+
SDValue Mask = DAG.getBitcast(VT, DAG.getConstant(RotMask, DL, MaskVT));
30068+
return DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, R, Mask,
30069+
DAG.getTargetConstant(0, DL, MVT::i8));
30070+
}
30071+
3002530072
// Split 256-bit integers on XOP/pre-AVX2 targets.
3002630073
if (VT.is256BitVector() && (Subtarget.hasXOP() || !Subtarget.hasAVX2()))
3002730074
return splitVectorIntBinary(Op, DAG, DL);
@@ -31345,7 +31392,8 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
3134531392
// If we have GFNI, we can use GF2P8AFFINEQB to reverse the bits.
3134631393
if (Subtarget.hasGFNI()) {
3134731394
MVT MatrixVT = MVT::getVectorVT(MVT::i64, NumElts / 8);
31348-
SDValue Matrix = DAG.getConstant(0x8040201008040201ULL, DL, MatrixVT);
31395+
SDValue Matrix =
31396+
DAG.getConstant(getGFNICtrlImm(ISD::BITREVERSE), DL, MatrixVT);
3134931397
Matrix = DAG.getBitcast(VT, Matrix);
3135031398
return DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, In, Matrix,
3135131399
DAG.getTargetConstant(0, DL, MVT::i8));

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,24 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
345345
Op1Info.getNoProps(), Op2Info.getNoProps());
346346
}
347347

348+
static const CostKindTblEntry GFNIUniformConstCostTable[] = {
349+
{ ISD::SHL, MVT::v16i8, { 1, 6, 1, 2 } }, // gf2p8affineqb
350+
{ ISD::SRL, MVT::v16i8, { 1, 6, 1, 2 } }, // gf2p8affineqb
351+
{ ISD::SRA, MVT::v16i8, { 1, 6, 1, 2 } }, // gf2p8affineqb
352+
{ ISD::SHL, MVT::v32i8, { 1, 6, 1, 2 } }, // gf2p8affineqb
353+
{ ISD::SRL, MVT::v32i8, { 1, 6, 1, 2 } }, // gf2p8affineqb
354+
{ ISD::SRA, MVT::v32i8, { 1, 6, 1, 2 } }, // gf2p8affineqb
355+
{ ISD::SHL, MVT::v64i8, { 1, 6, 1, 2 } }, // gf2p8affineqb
356+
{ ISD::SRL, MVT::v64i8, { 1, 6, 1, 2 } }, // gf2p8affineqb
357+
{ ISD::SRA, MVT::v64i8, { 1, 6, 1, 2 } }, // gf2p8affineqb
358+
};
359+
360+
if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasGFNI())
361+
if (const auto *Entry =
362+
CostTableLookup(GFNIUniformConstCostTable, ISD, LT.second))
363+
if (auto KindCost = Entry->Cost[CostKind])
364+
return LT.first * *KindCost;
365+
348366
static const CostKindTblEntry AVX512BWUniformConstCostTable[] = {
349367
{ ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } }, // psllw + pand.
350368
{ ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } }, // psrlw + pand.
@@ -3860,6 +3878,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
38603878
{ ISD::BITREVERSE, MVT::v2i64, { 1, 8, 2, 4 } }, // gf2p8affineqb
38613879
{ ISD::BITREVERSE, MVT::v4i64, { 1, 9, 2, 4 } }, // gf2p8affineqb
38623880
{ ISD::BITREVERSE, MVT::v8i64, { 1, 9, 2, 4 } }, // gf2p8affineqb
3881+
{ X86ISD::VROTLI, MVT::v16i8, { 1, 6, 1, 2 } }, // gf2p8affineqb
3882+
{ X86ISD::VROTLI, MVT::v32i8, { 1, 6, 1, 2 } }, // gf2p8affineqb
3883+
{ X86ISD::VROTLI, MVT::v64i8, { 1, 6, 1, 2 } }, // gf2p8affineqb
38633884
};
38643885
static const CostKindTblEntry GLMCostTbl[] = {
38653886
{ ISD::FSQRT, MVT::f32, { 19, 20, 1, 1 } }, // sqrtss

llvm/test/Analysis/CostModel/X86/fshl-codesize.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1597,9 +1597,9 @@ define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
15971597
;
15981598
; AVX512GFNI-LABEL: 'splatconstant_funnel_i8'
15991599
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
1600-
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
1601-
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
1602-
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
1600+
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
1601+
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
1602+
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
16031603
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
16041604
;
16051605
%I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
@@ -2871,9 +2871,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
28712871
;
28722872
; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
28732873
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
2874-
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
2875-
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
2876-
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
2874+
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
2875+
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
2876+
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
28772877
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
28782878
;
28792879
%I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)

llvm/test/Analysis/CostModel/X86/fshl-latency.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1549,9 +1549,9 @@ define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
15491549
;
15501550
; AVX512GFNI-LABEL: 'splatconstant_funnel_i8'
15511551
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
1552-
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
1553-
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
1554-
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
1552+
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
1553+
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
1554+
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
15551555
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
15561556
;
15571557
%I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
@@ -2823,9 +2823,9 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
28232823
;
28242824
; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
28252825
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
2826-
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
2827-
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
2828-
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
2826+
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
2827+
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
2828+
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
28292829
; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
28302830
;
28312831
%I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)

0 commit comments

Comments
 (0)