Skip to content

Commit 06f4ca2

Browse files
committed
[RISCV] Fix the cost of llvm.vector.reduce.and
1 parent 95b6524 commit 06f4ca2

File tree

4 files changed

+29
-14
lines changed

4 files changed

+29
-14
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,15 +1536,30 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
15361536
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
15371537
Type *ElementTy = Ty->getElementType();
15381538
if (ElementTy->isIntegerTy(1)) {
1539+
// Example sequences:
1540+
// vfirst.m a0, v0
1541+
// seqz a0, a0
1542+
if (LT.second == MVT::v1i1)
1543+
return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second, CostKind) +
1544+
getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
1545+
CmpInst::ICMP_EQ, CostKind);
1546+
15391547
if (ISD == ISD::AND) {
15401548
// Example sequences:
15411549
// vsetvli a0, zero, e8, mf8, ta, ma
15421550
// vmand.mm v8, v9, v8 ; needed every time type is split
15431551
// vmnot.m v8, v0
15441552
// vcpop.m a0, v8
15451553
// seqz a0, a0
1546-
return LT.first * getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
1547-
CostKind) +
1554+
1555+
// Fixed VT: In v512i1 and larger vector elements,
1556+
// Scalable VT: In v128i1 and larger vector elements,
1557+
// the VMAND_MM instructions have started to be added.
1558+
return ((LT.first >= 2)
1559+
? LT.first - (LT.second.isScalableVector() ? 1 : 2)
1560+
: 0) *
1561+
getRISCVInstructionCost(RISCV::VMAND_MM, LT.second, CostKind) +
1562+
getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second, CostKind) +
15481563
getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
15491564
getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
15501565
CmpInst::ICMP_EQ, CostKind);

llvm/test/Analysis/CostModel/RISCV/reduce-and.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,17 @@
66

77
define i32 @reduce_i1(i32 %arg) {
88
; CHECK-LABEL: 'reduce_i1'
9-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
9+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
1010
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
1111
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
1212
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
1313
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
1414
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
1515
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
1616
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
17-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
18-
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
19-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
17+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
18+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
19+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
2020
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> undef)
2121
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> undef)
2222
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> undef)
@@ -31,17 +31,17 @@ define i32 @reduce_i1(i32 %arg) {
3131
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
3232
;
3333
; SIZE-LABEL: 'reduce_i1'
34-
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
34+
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
3535
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
3636
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
3737
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
3838
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
3939
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
4040
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
4141
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
42-
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
43-
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
44-
; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
42+
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
43+
; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
44+
; SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
4545
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> undef)
4646
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> undef)
4747
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> undef)

llvm/test/Analysis/CostModel/RISCV/reduce-max.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ define i32 @reduce_umax_i64(i32 %arg) {
176176

177177
define i32 @reduce_smin_i1(i32 %arg) {
178178
; CHECK-LABEL: 'reduce_smin_i1'
179-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
179+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
180180
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> undef)
181181
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
182182
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> undef)
@@ -187,7 +187,7 @@ define i32 @reduce_smin_i1(i32 %arg) {
187187
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
188188
;
189189
; SIZE-LABEL: 'reduce_smin_i1'
190-
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
190+
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
191191
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> undef)
192192
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
193193
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> undef)

llvm/test/Analysis/CostModel/RISCV/reduce-min.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
define i32 @reduce_umin_i1(i32 %arg) {
88
; CHECK-LABEL: 'reduce_umin_i1'
9-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
9+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
1010
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.umin.v2i1(<2 x i1> undef)
1111
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> undef)
1212
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.umin.v8i1(<8 x i1> undef)
@@ -17,7 +17,7 @@ define i32 @reduce_umin_i1(i32 %arg) {
1717
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1818
;
1919
; SIZE-LABEL: 'reduce_umin_i1'
20-
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
20+
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
2121
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.umin.v2i1(<2 x i1> undef)
2222
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> undef)
2323
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.umin.v8i1(<8 x i1> undef)

0 commit comments

Comments
 (0)