Skip to content

Commit e93181b

Browse files
[RISCV][VLOPT] Add vector fp-conversion instruction to isSupportedInstr (#122033)
When these instructions are marked nofpexcept, we can optimize them. There are some added toggles in the output, likley because other noexcept fp instructions are not part of isSupportedInstr yet. We may want to avoid marking an instruction as isSupported in the future if any of its FP users are missing nofpexcept to avoid added toggles. However, we seem to get some GPRs back as a result of this change, which may outweigh the cost of avoiding extra toggles. The plan is to follow this patch up with added support for more FP instructions in the same way. The instructions in this patch are a natural starting point because they allow us to test with integer instructions which have good support already.
1 parent d6ae3d3 commit e93181b

17 files changed

+1155
-1122
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -940,6 +940,30 @@ static bool isSupportedInstr(const MachineInstr &MI) {
940940
case RISCV::VMSOF_M:
941941
case RISCV::VIOTA_M:
942942
case RISCV::VID_V:
943+
// Single-Width Floating-Point/Integer Type-Convert Instructions
944+
case RISCV::VFCVT_XU_F_V:
945+
case RISCV::VFCVT_X_F_V:
946+
case RISCV::VFCVT_RTZ_XU_F_V:
947+
case RISCV::VFCVT_RTZ_X_F_V:
948+
case RISCV::VFCVT_F_XU_V:
949+
case RISCV::VFCVT_F_X_V:
950+
// Widening Floating-Point/Integer Type-Convert Instructions
951+
case RISCV::VFWCVT_XU_F_V:
952+
case RISCV::VFWCVT_X_F_V:
953+
case RISCV::VFWCVT_RTZ_XU_F_V:
954+
case RISCV::VFWCVT_RTZ_X_F_V:
955+
case RISCV::VFWCVT_F_XU_V:
956+
case RISCV::VFWCVT_F_X_V:
957+
case RISCV::VFWCVT_F_F_V:
958+
// Narrowing Floating-Point/Integer Type-Convert Instructions
959+
case RISCV::VFNCVT_XU_F_W:
960+
case RISCV::VFNCVT_X_F_W:
961+
case RISCV::VFNCVT_RTZ_XU_F_W:
962+
case RISCV::VFNCVT_RTZ_X_F_W:
963+
case RISCV::VFNCVT_F_XU_W:
964+
case RISCV::VFNCVT_F_X_W:
965+
case RISCV::VFNCVT_F_F_W:
966+
case RISCV::VFNCVT_ROD_F_F_W:
943967
return true;
944968
}
945969

@@ -1046,6 +1070,11 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
10461070
return false;
10471071
}
10481072

1073+
if (MI.mayRaiseFPException()) {
1074+
LLVM_DEBUG(dbgs() << "Not a candidate because may raise FP exception\n");
1075+
return false;
1076+
}
1077+
10491078
// Some instructions that produce vectors have semantics that make it more
10501079
// difficult to determine whether the VL can be reduced. For example, some
10511080
// instructions, such as reductions, may write lanes past VL to a scalar

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1254,12 +1254,10 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
12541254
define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
12551255
; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32:
12561256
; ZVFH: # %bb.0:
1257-
; ZVFH-NEXT: vsetivli zero, 3, e32, m1, ta, ma
1257+
; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
12581258
; ZVFH-NEXT: vle32.v v8, (a1)
12591259
; ZVFH-NEXT: vle16.v v9, (a0)
1260-
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
12611260
; ZVFH-NEXT: vfncvt.f.f.w v10, v8
1262-
; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
12631261
; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10
12641262
; ZVFH-NEXT: vse16.v v8, (a0)
12651263
; ZVFH-NEXT: ret
@@ -1272,9 +1270,7 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
12721270
; ZVFHMIN-NEXT: lui a1, 8
12731271
; ZVFHMIN-NEXT: addi a2, a1, -1
12741272
; ZVFHMIN-NEXT: vand.vx v8, v8, a2
1275-
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
12761273
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
1277-
; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
12781274
; ZVFHMIN-NEXT: vxor.vx v9, v10, a1
12791275
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
12801276
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
@@ -4013,9 +4009,10 @@ define void @trunc_v6f16(ptr %x) {
40134009
; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
40144010
; ZVFH-NEXT: vfabs.v v9, v8
40154011
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4012+
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
40164013
; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
40174014
; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4018-
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, mu
4015+
; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
40194016
; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
40204017
; ZVFH-NEXT: vse16.v v8, (a0)
40214018
; ZVFH-NEXT: ret
@@ -4197,10 +4194,11 @@ define void @ceil_v6f16(ptr %x) {
41974194
; ZVFH-NEXT: vfabs.v v9, v8
41984195
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
41994196
; ZVFH-NEXT: fsrmi a1, 3
4197+
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
42004198
; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
42014199
; ZVFH-NEXT: fsrm a1
42024200
; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4203-
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, mu
4201+
; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
42044202
; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
42054203
; ZVFH-NEXT: vse16.v v8, (a0)
42064204
; ZVFH-NEXT: ret
@@ -4388,10 +4386,11 @@ define void @floor_v6f16(ptr %x) {
43884386
; ZVFH-NEXT: vfabs.v v9, v8
43894387
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
43904388
; ZVFH-NEXT: fsrmi a1, 2
4389+
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
43914390
; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
43924391
; ZVFH-NEXT: fsrm a1
43934392
; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4394-
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, mu
4393+
; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
43954394
; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
43964395
; ZVFH-NEXT: vse16.v v8, (a0)
43974396
; ZVFH-NEXT: ret
@@ -4579,10 +4578,11 @@ define void @round_v6f16(ptr %x) {
45794578
; ZVFH-NEXT: vfabs.v v9, v8
45804579
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
45814580
; ZVFH-NEXT: fsrmi a1, 4
4581+
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
45824582
; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
45834583
; ZVFH-NEXT: fsrm a1
45844584
; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4585-
; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, mu
4585+
; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
45864586
; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
45874587
; ZVFH-NEXT: vse16.v v8, (a0)
45884588
; ZVFH-NEXT: ret

0 commit comments

Comments
 (0)