Skip to content

Commit 46829e5

Browse files
authored
[RISCV][CostModel] Correct the cost of some reductions (llvm#118072)
Reductions include: and/or/max/min
1 parent f68b0e3 commit 46829e5

File tree

9 files changed

+645
-634
lines changed

9 files changed

+645
-634
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1476,35 +1476,35 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
14761476
return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
14771477
}
14781478

1479-
// IR Reduction is composed by two vmv and one rvv reduction instruction.
1479+
// IR Reduction is composed by one rvv reduction instruction and vmv
14801480
unsigned SplitOp;
14811481
SmallVector<unsigned, 3> Opcodes;
14821482
switch (IID) {
14831483
default:
14841484
llvm_unreachable("Unsupported intrinsic");
14851485
case Intrinsic::smax:
14861486
SplitOp = RISCV::VMAX_VV;
1487-
Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1487+
Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
14881488
break;
14891489
case Intrinsic::smin:
14901490
SplitOp = RISCV::VMIN_VV;
1491-
Opcodes = {RISCV::VMV_S_X, RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1491+
Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
14921492
break;
14931493
case Intrinsic::umax:
14941494
SplitOp = RISCV::VMAXU_VV;
1495-
Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1495+
Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
14961496
break;
14971497
case Intrinsic::umin:
14981498
SplitOp = RISCV::VMINU_VV;
1499-
Opcodes = {RISCV::VMV_S_X, RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1499+
Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
15001500
break;
15011501
case Intrinsic::maxnum:
15021502
SplitOp = RISCV::VFMAX_VV;
1503-
Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1503+
Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
15041504
break;
15051505
case Intrinsic::minnum:
15061506
SplitOp = RISCV::VFMIN_VV;
1507-
Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1507+
Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
15081508
break;
15091509
}
15101510
// Add a cost for data larger than LMUL8
@@ -1548,6 +1548,15 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
15481548
getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
15491549
getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
15501550
CmpInst::ICMP_EQ, CostKind);
1551+
} else if (ISD == ISD::XOR) {
1552+
// Example sequences:
1553+
// vsetvli a0, zero, e8, mf8, ta, ma
1554+
// vmxor.mm v8, v0, v8 ; needed every time type is split
1555+
// vcpop.m a0, v8
1556+
// andi a0, a0, 1
1557+
return (LT.first - 1) *
1558+
getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind) +
1559+
getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) + 1;
15511560
} else {
15521561
// Example sequences:
15531562
// vsetvli a0, zero, e8, mf8, ta, ma
@@ -1562,7 +1571,9 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
15621571
}
15631572
}
15641573

1565-
// IR Reduction is composed by two vmv and one rvv reduction instruction.
1574+
// IR Reduction of or/and is composed by one vmv and one rvv reduction
1575+
// instruction, and others is composed by two vmv and one rvv reduction
1576+
// instruction
15661577
unsigned SplitOp;
15671578
SmallVector<unsigned, 3> Opcodes;
15681579
switch (ISD) {
@@ -1572,15 +1583,15 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
15721583
break;
15731584
case ISD::OR:
15741585
SplitOp = RISCV::VOR_VV;
1575-
Opcodes = {RISCV::VMV_S_X, RISCV::VREDOR_VS, RISCV::VMV_X_S};
1586+
Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
15761587
break;
15771588
case ISD::XOR:
15781589
SplitOp = RISCV::VXOR_VV;
15791590
Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
15801591
break;
15811592
case ISD::AND:
15821593
SplitOp = RISCV::VAND_VV;
1583-
Opcodes = {RISCV::VMV_S_X, RISCV::VREDAND_VS, RISCV::VMV_X_S};
1594+
Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
15841595
break;
15851596
case ISD::FADD:
15861597
// We can't promote f16/bf16 fadd reductions.

llvm/test/Analysis/CostModel/RISCV/cttz_elts.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,28 +9,28 @@ define void @foo_no_vscale_range() {
99
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 true)
1010
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true)
1111
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv64i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 true)
12-
; CHECK-NEXT: Cost Model: Found an estimated cost of 662 for instruction: %res.i64.nxv128i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 true)
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %res.i64.nxv128i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 true)
1313
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 true)
1414
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 true)
1515
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 true)
1616
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 true)
1717
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true)
1818
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 true)
19-
; CHECK-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 true)
19+
; CHECK-NEXT: Cost Model: Found an estimated cost of 334 for instruction: %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 true)
2020
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false)
2121
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false)
2222
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false)
2323
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false)
2424
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
2525
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv64i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 false)
26-
; CHECK-NEXT: Cost Model: Found an estimated cost of 662 for instruction: %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 false)
26+
; CHECK-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 false)
2727
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false)
2828
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false)
2929
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false)
3030
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false)
3131
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
3232
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 false)
33-
; CHECK-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 false)
33+
; CHECK-NEXT: Cost Model: Found an estimated cost of 334 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 false)
3434
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
3535
;
3636
%res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true)
@@ -75,28 +75,28 @@ define void @foo_vscale_range_2_16() vscale_range(2,16) {
7575
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 true)
7676
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true)
7777
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv64i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 true)
78-
; CHECK-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %res.i64.nxv128i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 true)
78+
; CHECK-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %res.i64.nxv128i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 true)
7979
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 true)
8080
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 true)
8181
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 true)
8282
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 true)
8383
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true)
8484
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 true)
85-
; CHECK-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 true)
85+
; CHECK-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 true)
8686
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false)
8787
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false)
8888
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false)
8989
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false)
9090
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
9191
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i64.nxv64i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1(<vscale x 64 x i1> undef, i1 false)
92-
; CHECK-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 false)
92+
; CHECK-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1(<vscale x 128 x i1> undef, i1 false)
9393
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false)
9494
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false)
9595
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false)
9696
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false)
9797
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
9898
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> undef, i1 false)
99-
; CHECK-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 false)
99+
; CHECK-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1(<vscale x 128 x i1> undef, i1 false)
100100
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
101101
;
102102
%res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true)

0 commit comments

Comments
 (0)