Skip to content

Commit 4c68de5

Browse files
authored
[RISCV][CostModel] Add cost model for experimental.cttz.elts (#91778)
The cost of `experimental.cttz.elts` in RISC-V equals to the cost of vfirst when the zero_is_poison argument is true. Otherwise, we add additional costs of cmp + select to convert the -1 result from vfirst to EVL.
1 parent 0f17d9a commit 4c68de5

File tree

3 files changed

+47
-26
lines changed

3 files changed

+47
-26
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,8 @@ class RISCVTargetLowering : public TargetLowering {
567567
shouldExpandBuildVectorWithShuffles(EVT VT,
568568
unsigned DefinedValues) const override;
569569

570+
bool shouldExpandCttzElements(EVT VT) const override;
571+
570572
/// Return the cost of LMUL for linear operations.
571573
InstructionCost getLMULCost(MVT VT) const;
572574

@@ -1001,8 +1003,6 @@ class RISCVTargetLowering : public TargetLowering {
10011003
bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF,
10021004
bool IsScalable) const override;
10031005

1004-
bool shouldExpandCttzElements(EVT VT) const override;
1005-
10061006
/// RVV code generation for fixed length vectors does not lower all
10071007
/// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to
10081008
/// merge. However, merging them creates a BUILD_VECTOR that is just as

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
9797
case RISCV::VMANDN_MM:
9898
case RISCV::VMNAND_MM:
9999
case RISCV::VCPOP_M:
100+
case RISCV::VFIRST_M:
100101
Cost += 1;
101102
break;
102103
default:
@@ -901,6 +902,26 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
901902
getRISCVInstructionCost(RISCV::VADD_VX, LT.second, CostKind);
902903
return 1 + (LT.first - 1);
903904
}
905+
case Intrinsic::experimental_cttz_elts: {
906+
Type *ArgTy = ICA.getArgTypes()[0];
907+
EVT ArgType = TLI->getValueType(DL, ArgTy, true);
908+
if (getTLI()->shouldExpandCttzElements(ArgType))
909+
break;
910+
InstructionCost Cost = getRISCVInstructionCost(
911+
RISCV::VFIRST_M, getTypeLegalizationCost(ArgTy).second, CostKind);
912+
913+
// If zero_is_poison is false, then we will generate additional
914+
// cmp + select instructions to convert -1 to EVL.
915+
Type *BoolTy = Type::getInt1Ty(RetTy->getContext());
916+
if (ICA.getArgs().size() > 1 &&
917+
cast<ConstantInt>(ICA.getArgs()[1])->isZero())
918+
Cost += getCmpSelInstrCost(Instruction::ICmp, BoolTy, RetTy,
919+
CmpInst::ICMP_SLT, CostKind) +
920+
getCmpSelInstrCost(Instruction::Select, RetTy, BoolTy,
921+
CmpInst::BAD_ICMP_PREDICATE, CostKind);
922+
923+
return Cost;
924+
}
904925
case Intrinsic::vp_rint: {
905926
// RISC-V target uses at least 5 instructions to lower rounding intrinsics.
906927
unsigned Cost = 5;

llvm/test/Analysis/CostModel/RISCV/cttz_elts.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,19 @@ define void @foo_no_vscale_range() {
1717
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true)
1818
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 true)
1919
; CHECK-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 true)
20-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false)
21-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false)
22-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false)
23-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false)
24-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
25-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv64i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 false)
20+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false)
21+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false)
22+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false)
23+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false)
24+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
25+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv64i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 false)
2626
; CHECK-NEXT: Cost Model: Found an estimated cost of 781 for instruction: %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 false)
27-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false)
28-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false)
29-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false)
30-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false)
31-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
32-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 false)
27+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false)
28+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false)
29+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false)
30+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false)
31+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
32+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 false)
3333
; CHECK-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 false)
3434
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
3535
;
@@ -83,19 +83,19 @@ define void @foo_vscale_range_2_16() vscale_range(2,16) {
8383
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true)
8484
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 true)
8585
; CHECK-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 true)
86-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false)
87-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false)
88-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false)
89-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false)
90-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
91-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv64i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 false)
86+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false)
87+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false)
88+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false)
89+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false)
90+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
91+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv64i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 false)
9292
; CHECK-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 false)
93-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false)
94-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false)
95-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false)
96-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false)
97-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
98-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 false)
93+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false)
94+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false)
95+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false)
96+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false)
97+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
98+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 false)
9999
; CHECK-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 false)
100100
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
101101
;

0 commit comments

Comments
 (0)