Skip to content

[RISCV][VLOPT] Enable the RISCVVLOptimizer by default #119461

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ static cl::opt<bool> EnablePostMISchedLoadStoreClustering(
static cl::opt<bool>
EnableVLOptimizer("riscv-enable-vl-optimizer",
cl::desc("Enable the RISC-V VL Optimizer pass"),
cl::init(false), cl::Hidden);
cl::init(true), cl::Hidden);

static cl::opt<bool> DisableVectorMaskMutation(
"riscv-disable-vector-mask-mutation",
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@
; RV64-NEXT: RISC-V Optimize W Instructions
; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass
; CHECK-NEXT: RISC-V Merge Base Offset
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: RISC-V VL Optimizer
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
; CHECK-NEXT: RISC-V Landing Pad Setup
Expand All @@ -129,7 +131,6 @@
; CHECK-NEXT: Live Variable Analysis
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: Register Coalescer
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2654,9 +2654,8 @@ define <vscale x 1 x i9> @vp_ctlo_zero_undef_nxv1i9(<vscale x 1 x i9> %va, <vsca
; CHECK-LABEL: vp_ctlo_zero_undef_nxv1i9:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 511
; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
; CHECK-NEXT: vxor.vx v8, v8, a1
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; CHECK-NEXT: vxor.vx v8, v8, a1
; CHECK-NEXT: vsll.vi v8, v8, 7, v0.t
; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
Expand All @@ -2670,9 +2669,8 @@ define <vscale x 1 x i9> @vp_ctlo_zero_undef_nxv1i9(<vscale x 1 x i9> %va, <vsca
; CHECK-ZVBB-LABEL: vp_ctlo_zero_undef_nxv1i9:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: li a1, 511
; CHECK-ZVBB-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
; CHECK-ZVBB-NEXT: vxor.vx v8, v8, a1
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; CHECK-ZVBB-NEXT: vxor.vx v8, v8, a1
; CHECK-ZVBB-NEXT: vsll.vi v8, v8, 7, v0.t
; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: ret
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,7 @@ define void @abs_v6i16(ptr %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vrsub.vi v9, v8, 0
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vmax.vv v8, v8, v9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
Expand Down
20 changes: 4 additions & 16 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -788,11 +788,9 @@ define void @copysign_v6bf16(ptr %x, ptr %y) {
; CHECK-NEXT: vle16.v v8, (a1)
; CHECK-NEXT: vle16.v v9, (a0)
; CHECK-NEXT: lui a1, 8
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vand.vx v8, v8, a1
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: vand.vx v9, v9, a1
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vor.vv v8, v9, v8
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
Expand Down Expand Up @@ -848,11 +846,9 @@ define void @copysign_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-NEXT: vle16.v v8, (a1)
; ZVFHMIN-NEXT: vle16.v v9, (a0)
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: addi a1, a1, -1
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vor.vv v8, v9, v8
; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -924,12 +920,10 @@ define void @copysign_vf_v6bf16(ptr %x, bfloat %y) {
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a2, 8
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.x v9, a1
; CHECK-NEXT: addi a1, a2, -1
; CHECK-NEXT: vand.vx v8, v8, a1
; CHECK-NEXT: vand.vx v9, v9, a2
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vor.vv v8, v8, v9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
Expand Down Expand Up @@ -986,12 +980,10 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
; ZVFHMIN-NEXT: lui a2, 8
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: addi a1, a2, -1
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vand.vx v9, v9, a2
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -1065,11 +1057,9 @@ define void @copysign_neg_v6bf16(ptr %x, ptr %y) {
; CHECK-NEXT: vle16.v v9, (a0)
; CHECK-NEXT: lui a1, 8
; CHECK-NEXT: addi a2, a1, -1
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vxor.vx v8, v8, a1
; CHECK-NEXT: vand.vx v9, v9, a2
; CHECK-NEXT: vand.vx v8, v8, a1
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vor.vv v8, v9, v8
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
Expand Down Expand Up @@ -1129,11 +1119,9 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-NEXT: vle16.v v9, (a0)
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: addi a2, a1, -1
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: vand.vx v9, v9, a2
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vor.vv v8, v9, v8
; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -1211,12 +1199,12 @@ define void @copysign_neg_trunc_v3bf16_v3f32(ptr %x, ptr %y) {
; CHECK-NEXT: vle32.v v9, (a1)
; CHECK-NEXT: lui a1, 8
; CHECK-NEXT: addi a2, a1, -1
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vand.vx v8, v8, a2
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v9
; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
; CHECK-NEXT: vxor.vx v9, v10, a1
; CHECK-NEXT: vand.vx v9, v9, a1
; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
; CHECK-NEXT: vor.vv v8, v8, v9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
Expand Down Expand Up @@ -1283,12 +1271,12 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
; ZVFHMIN-NEXT: vle32.v v9, (a1)
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: addi a2, a1, -1
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a2
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v9, v10, a1
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -910,8 +910,9 @@ define <4 x i8> @buildvec_not_vid_v4i8_2() {
define <16 x i8> @buildvec_not_vid_v16i8() {
; CHECK-LABEL: buildvec_not_vid_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 7, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vsetivli zero, 7, e8, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v9, 6
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,9 @@ define <8 x i8> @splat_ve4_ins_i0ve2(<8 x i8> %v) {
define <8 x i8> @splat_ve4_ins_i1ve3(<8 x i8> %v) {
; CHECK-LABEL: splat_ve4_ins_i1ve3:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Non blocking, but this shows a case where we probably want to teach VSETVLI insertion that it can increase VL if the instruction is tail undefined.

; CHECK-NEXT: vmv.v.i v10, 4
; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v10, v9, 1
Expand Down Expand Up @@ -432,8 +433,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) {
define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: splat_ve2_we0_ins_i2we4:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vsetivli zero, 3, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 4
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: li a0, 70
; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
Expand Down
4 changes: 3 additions & 1 deletion llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1100,15 +1100,17 @@ define void @mulhu_v8i16(ptr %x) {
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: vsetivli zero, 7, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 1
; CHECK-NEXT: li a1, 33
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: lui a1, %hi(.LCPI66_0)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI66_0)
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v11, 3
; CHECK-NEXT: vle16.v v12, (a1)
; CHECK-NEXT: vmerge.vim v11, v11, 2, v0
; CHECK-NEXT: vmv.v.i v13, 0
; CHECK-NEXT: vmv1r.v v13, v9
; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vi v9, v10, 6
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,9 @@ define <4 x i32> @v4i32_v8i32(<8 x i32>) {
define <4 x i32> @v4i32_v16i32(<16 x i32>) {
; RV32-LABEL: v4i32_v16i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma
; RV32-NEXT: vmv.v.i v12, 1
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vmv.v.i v14, 6
Copy link
Contributor

@lukel97 lukel97 Dec 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we know why only one of the vmv.v.is had their VL reduced here?

Edit, just seeing Philip's comment above that explains it.

Copy link
Contributor Author

@michaelmaitland michaelmaitland Dec 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Take a look at the MIR: https://godbolt.org/z/xrvG13qx6

You can see that %4 is used as a tied operand. We don't optimize that case:

// Tied operands might pass through.

; RV32-NEXT: li a0, 32
; RV32-NEXT: vmv.v.i v0, 10
Expand Down
Loading
Loading