Skip to content

Commit 20637e7

Browse files
authored
[RISCV] Sink splatted fpext operands (#125800)
We sink splatted operands in codegenprepare to help match .vx/.vf patterns. This extends it to also splat any fpext so that we can match widening vfwadd.vf/vfwadd.wf patterns too. Some instructions don't have .wf forms so there's no benefit to sinking the fpext. For simplicity this sinks them anyway and lets earlymachine-licm hoist them back out.
1 parent 51b0517 commit 20637e7

File tree

2 files changed

+155
-1
lines changed

2 files changed

+155
-1
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2758,7 +2758,12 @@ bool RISCVTTIImpl::isProfitableToSinkOperands(
27582758
return false;
27592759
}
27602760

2761-
Ops.push_back(&Op->getOperandUse(0));
2761+
Use *InsertEltUse = &Op->getOperandUse(0);
2762+
// Sink any fpexts since they might be used in a widening fp pattern.
2763+
auto *InsertElt = cast<InsertElementInst>(InsertEltUse);
2764+
if (isa<FPExtInst>(InsertElt->getOperand(1)))
2765+
Ops.push_back(&InsertElt->getOperandUse(1));
2766+
Ops.push_back(InsertEltUse);
27622767
Ops.push_back(&OpIdx.value());
27632768
}
27642769
return true;

llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5741,3 +5741,152 @@ vector.body:
57415741
for.cond.cleanup:
57425742
ret void
57435743
}
5744+
5745+
define void @sink_splat_vfwadd_vf(ptr nocapture %a, ptr nocapture %b, float %f) {
5746+
; CHECK-LABEL: sink_splat_vfwadd_vf:
5747+
; CHECK: # %bb.0: # %entry
5748+
; CHECK-NEXT: li a1, 0
5749+
; CHECK-NEXT: li a2, 1020
5750+
; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
5751+
; CHECK-NEXT: .LBB125_1: # %vector.body
5752+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5753+
; CHECK-NEXT: vl1re32.v v8, (a0)
5754+
; CHECK-NEXT: addi a1, a1, 4
5755+
; CHECK-NEXT: addi a2, a2, -4
5756+
; CHECK-NEXT: vfwadd.vf v10, v8, fa0
5757+
; CHECK-NEXT: vs2r.v v10, (a0)
5758+
; CHECK-NEXT: addi a0, a0, 16
5759+
; CHECK-NEXT: j .LBB125_1
5760+
entry:
5761+
%f.ext = fpext float %f to double
5762+
%broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
5763+
%broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
5764+
br label %vector.body
5765+
5766+
vector.body: ; preds = %vector.body, %entry
5767+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5768+
%0 = getelementptr float, ptr %a, i64 %index
5769+
%wide.load = load <vscale x 2 x float>, ptr %0
5770+
%ext = fpext <vscale x 2 x float> %wide.load to <vscale x 2 x double>
5771+
%1 = fadd <vscale x 2 x double> %ext, %broadcast.splat
5772+
%2 = getelementptr double, ptr %b, i64 %index
5773+
store <vscale x 2 x double> %1, ptr %0
5774+
%index.next = add i64 %index, 4
5775+
%3 = icmp eq i64 %index.next, 1024
5776+
br i1 32, label %for.cond.cleanup, label %vector.body
5777+
5778+
for.cond.cleanup: ; preds = %vector.body
5779+
ret void
5780+
}
5781+
5782+
define void @sink_splat_vfwadd_wf(ptr nocapture %a, ptr nocapture %b, float %f) {
5783+
; CHECK-LABEL: sink_splat_vfwadd_wf:
5784+
; CHECK: # %bb.0: # %entry
5785+
; CHECK-NEXT: li a1, 0
5786+
; CHECK-NEXT: li a2, 1020
5787+
; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
5788+
; CHECK-NEXT: .LBB126_1: # %vector.body
5789+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5790+
; CHECK-NEXT: vl2re64.v v8, (a0)
5791+
; CHECK-NEXT: addi a1, a1, 4
5792+
; CHECK-NEXT: addi a2, a2, -4
5793+
; CHECK-NEXT: vfwadd.wf v8, v8, fa0
5794+
; CHECK-NEXT: vs2r.v v8, (a0)
5795+
; CHECK-NEXT: addi a0, a0, 32
5796+
; CHECK-NEXT: j .LBB126_1
5797+
entry:
5798+
%f.ext = fpext float %f to double
5799+
%broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
5800+
%broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
5801+
br label %vector.body
5802+
5803+
vector.body: ; preds = %vector.body, %entry
5804+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5805+
%0 = getelementptr double, ptr %a, i64 %index
5806+
%wide.load = load <vscale x 2 x double>, ptr %0
5807+
%1 = fadd <vscale x 2 x double> %wide.load, %broadcast.splat
5808+
%2 = getelementptr double, ptr %b, i64 %index
5809+
store <vscale x 2 x double> %1, ptr %0
5810+
%index.next = add i64 %index, 4
5811+
%3 = icmp eq i64 %index.next, 1024
5812+
br i1 32, label %for.cond.cleanup, label %vector.body
5813+
5814+
for.cond.cleanup: ; preds = %vector.body
5815+
ret void
5816+
}
5817+
5818+
define void @sink_splat_vfwmul_vf(ptr nocapture %a, ptr nocapture %b, float %f) {
5819+
; CHECK-LABEL: sink_splat_vfwmul_vf:
5820+
; CHECK: # %bb.0: # %entry
5821+
; CHECK-NEXT: li a1, 0
5822+
; CHECK-NEXT: li a2, 1020
5823+
; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
5824+
; CHECK-NEXT: .LBB127_1: # %vector.body
5825+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5826+
; CHECK-NEXT: vl1re32.v v8, (a0)
5827+
; CHECK-NEXT: addi a1, a1, 4
5828+
; CHECK-NEXT: addi a2, a2, -4
5829+
; CHECK-NEXT: vfwmul.vf v10, v8, fa0
5830+
; CHECK-NEXT: vs2r.v v10, (a0)
5831+
; CHECK-NEXT: addi a0, a0, 16
5832+
; CHECK-NEXT: j .LBB127_1
5833+
entry:
5834+
%f.ext = fpext float %f to double
5835+
%broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
5836+
%broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
5837+
br label %vector.body
5838+
5839+
vector.body: ; preds = %vector.body, %entry
5840+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5841+
%0 = getelementptr float, ptr %a, i64 %index
5842+
%wide.load = load <vscale x 2 x float>, ptr %0
5843+
%ext = fpext <vscale x 2 x float> %wide.load to <vscale x 2 x double>
5844+
%1 = fmul <vscale x 2 x double> %ext, %broadcast.splat
5845+
%2 = getelementptr double, ptr %b, i64 %index
5846+
store <vscale x 2 x double> %1, ptr %0
5847+
%index.next = add i64 %index, 4
5848+
%3 = icmp eq i64 %index.next, 1024
5849+
br i1 32, label %for.cond.cleanup, label %vector.body
5850+
5851+
for.cond.cleanup: ; preds = %vector.body
5852+
ret void
5853+
}
5854+
5855+
; Even though there's no vfwmul.wf we'll sink the fcvt.d.s. Make sure
5856+
; early-machinelicm undos the sink after isel.
5857+
define void @sink_splat_vfwmul_wf(ptr nocapture %a, ptr nocapture %b, float %f) {
5858+
; CHECK-LABEL: sink_splat_vfwmul_wf:
5859+
; CHECK: # %bb.0: # %entry
5860+
; CHECK-NEXT: li a1, 0
5861+
; CHECK-NEXT: li a2, 1020
5862+
; CHECK-NEXT: fcvt.d.s fa5, fa0
5863+
; CHECK-NEXT: vsetvli a3, zero, e64, m2, ta, ma
5864+
; CHECK-NEXT: .LBB128_1: # %vector.body
5865+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5866+
; CHECK-NEXT: vl2re64.v v8, (a0)
5867+
; CHECK-NEXT: addi a1, a1, 4
5868+
; CHECK-NEXT: addi a2, a2, -4
5869+
; CHECK-NEXT: vfmul.vf v8, v8, fa5
5870+
; CHECK-NEXT: vs2r.v v8, (a0)
5871+
; CHECK-NEXT: addi a0, a0, 16
5872+
; CHECK-NEXT: j .LBB128_1
5873+
entry:
5874+
%f.ext = fpext float %f to double
5875+
%broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
5876+
%broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
5877+
br label %vector.body
5878+
5879+
vector.body: ; preds = %vector.body, %entry
5880+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5881+
%0 = getelementptr float, ptr %a, i64 %index
5882+
%wide.load = load <vscale x 2 x double>, ptr %0
5883+
%1 = fmul <vscale x 2 x double> %wide.load, %broadcast.splat
5884+
%2 = getelementptr double, ptr %b, i64 %index
5885+
store <vscale x 2 x double> %1, ptr %0
5886+
%index.next = add i64 %index, 4
5887+
%3 = icmp eq i64 %index.next, 1024
5888+
br i1 32, label %for.cond.cleanup, label %vector.body
5889+
5890+
for.cond.cleanup: ; preds = %vector.body
5891+
ret void
5892+
}

0 commit comments

Comments
 (0)