Skip to content

Commit 2a43729

Browse files
committed
[RISCV][CostModel] Correct the cost of some reductions
Reductions include: and/or/max/min
1 parent 4a3f46d commit 2a43729

File tree

9 files changed

+639
-632
lines changed

9 files changed

+639
-632
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1470,27 +1470,27 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
14701470
llvm_unreachable("Unsupported intrinsic");
14711471
case Intrinsic::smax:
14721472
SplitOp = RISCV::VMAX_VV;
1473-
Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1473+
Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
14741474
break;
14751475
case Intrinsic::smin:
14761476
SplitOp = RISCV::VMIN_VV;
1477-
Opcodes = {RISCV::VMV_S_X, RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1477+
Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
14781478
break;
14791479
case Intrinsic::umax:
14801480
SplitOp = RISCV::VMAXU_VV;
1481-
Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1481+
Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
14821482
break;
14831483
case Intrinsic::umin:
14841484
SplitOp = RISCV::VMINU_VV;
1485-
Opcodes = {RISCV::VMV_S_X, RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1485+
Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
14861486
break;
14871487
case Intrinsic::maxnum:
14881488
SplitOp = RISCV::VFMAX_VV;
1489-
Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1489+
Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
14901490
break;
14911491
case Intrinsic::minnum:
14921492
SplitOp = RISCV::VFMIN_VV;
1493-
Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1493+
Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
14941494
break;
14951495
}
14961496
// Add a cost for data larger than LMUL8
@@ -1534,6 +1534,13 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
15341534
getRISCVInstructionCost(Opcodes, LT.second, CostKind) +
15351535
getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
15361536
CmpInst::ICMP_EQ, CostKind);
1537+
} else if (ISD == ISD::XOR) {
1538+
// Example sequences:
1539+
// vsetvli a0, zero, e8, mf8, ta, ma
1540+
// vcpop.m a0, v0
1541+
// andi a0, a0, 1
1542+
Opcodes = {RISCV::VCPOP_M};
1543+
return LT.first + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
15371544
} else {
15381545
// Example sequences:
15391546
// vsetvli a0, zero, e8, mf8, ta, ma
@@ -1556,15 +1563,15 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
15561563
break;
15571564
case ISD::OR:
15581565
SplitOp = RISCV::VOR_VV;
1559-
Opcodes = {RISCV::VMV_S_X, RISCV::VREDOR_VS, RISCV::VMV_X_S};
1566+
Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
15601567
break;
15611568
case ISD::XOR:
15621569
SplitOp = RISCV::VXOR_VV;
15631570
Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
15641571
break;
15651572
case ISD::AND:
15661573
SplitOp = RISCV::VAND_VV;
1567-
Opcodes = {RISCV::VMV_S_X, RISCV::VREDAND_VS, RISCV::VMV_X_S};
1574+
Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
15681575
break;
15691576
case ISD::FADD:
15701577
// We can't promote f16/bf16 fadd reductions.

llvm/test/Analysis/CostModel/RISCV/cttz_elts.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,28 +9,28 @@ define void @foo_no_vscale_range() {
99
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 true)
1010
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true)
1111
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv64i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 true)
12-
; CHECK-NEXT: Cost Model: Found an estimated cost of 662 for instruction: %res.i64.nxv128i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 true)
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %res.i64.nxv128i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 true)
1313
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 true)
1414
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 true)
1515
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 true)
1616
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 true)
1717
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true)
1818
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 true)
19-
; CHECK-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 true)
19+
; CHECK-NEXT: Cost Model: Found an estimated cost of 334 for instruction: %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 true)
2020
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false)
2121
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false)
2222
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false)
2323
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false)
2424
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
2525
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv64i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 false)
26-
; CHECK-NEXT: Cost Model: Found an estimated cost of 662 for instruction: %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 false)
26+
; CHECK-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 false)
2727
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false)
2828
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false)
2929
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false)
3030
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false)
3131
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
3232
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 false)
33-
; CHECK-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 false)
33+
; CHECK-NEXT: Cost Model: Found an estimated cost of 334 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 false)
3434
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
3535
;
3636
%res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true)
@@ -75,28 +75,28 @@ define void @foo_vscale_range_2_16() vscale_range(2,16) {
7575
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 true)
7676
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true)
7777
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv64i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 true)
78-
; CHECK-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %res.i64.nxv128i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 true)
78+
; CHECK-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %res.i64.nxv128i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 true)
7979
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 true)
8080
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 true)
8181
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 true)
8282
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 true)
8383
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true)
8484
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 true)
85-
; CHECK-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 true)
85+
; CHECK-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 true)
8686
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false)
8787
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false)
8888
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false)
8989
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false)
9090
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
9191
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv64i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 false)
92-
; CHECK-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 false)
92+
; CHECK-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 false)
9393
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false)
9494
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false)
9595
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false)
9696
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false)
9797
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
9898
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 false)
99-
; CHECK-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 false)
99+
; CHECK-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 false)
100100
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
101101
;
102102
%res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true)

0 commit comments

Comments
 (0)