Skip to content

Commit 51b0517

Browse files
authored
[RISCV] Don't check extop VL in vfwred{u,o}sum patterns (#125799)
Because riscv_fpextend_vl doesn't have a passthru operand the tail elements are undef, so we can treat them as if they were active. Relaxing this allows us to match widening reductions where the fpextend isn't a VP intrinsic. This same reasoning is already used for riscv_fpextend_vl in RISCVInstrInfoVSDPatterns.td
1 parent 3ac1cb6 commit 51b0517

File tree

2 files changed

+22
-26
lines changed

2 files changed

+22
-26
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1420,7 +1420,7 @@ multiclass VPatWidenReductionVL_Ext_VL<SDNode vop, PatFrags extop, string instru
14201420
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
14211421
GetVTypePredicates<wti>.Predicates) in {
14221422
def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$passthru),
1423-
(wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
1423+
(wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), (XLenVT srcvalue))),
14241424
VR:$rs2, (vti.Mask V0), VLOpFrag,
14251425
(XLenVT timm:$policy))),
14261426
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -1439,7 +1439,7 @@ multiclass VPatWidenReductionVL_Ext_VL_RM<SDNode vop, PatFrags extop, string ins
14391439
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
14401440
GetVTypePredicates<wti>.Predicates) in {
14411441
def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$passthru),
1442-
(wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
1442+
(wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), (XLenVT srcvalue))),
14431443
VR:$rs2, (vti.Mask V0), VLOpFrag,
14441444
(XLenVT timm:$policy))),
14451445
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")

llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -533,13 +533,12 @@ define double @vpreduce_ord_fadd_fpext_vp_fpext_nxv1f32_nxv1f64(double %s, <vsca
533533
define float @vpreduce_fadd_fpext_nxv1f16_nxv1f32(float %s, <vscale x 1 x half> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
534534
; CHECK-LABEL: vpreduce_fadd_fpext_nxv1f16_nxv1f32:
535535
; CHECK: # %bb.0:
536+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
537+
; CHECK-NEXT: vfmv.s.f v9, fa0
536538
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
537-
; CHECK-NEXT: vfwcvt.f.f.v v9, v8
538-
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
539-
; CHECK-NEXT: vfmv.s.f v8, fa0
540-
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
541-
; CHECK-NEXT: vfredusum.vs v8, v9, v8, v0.t
542-
; CHECK-NEXT: vfmv.f.s fa0, v8
539+
; CHECK-NEXT: vfwredusum.vs v9, v8, v9, v0.t
540+
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
541+
; CHECK-NEXT: vfmv.f.s fa0, v9
543542
; CHECK-NEXT: ret
544543
%w = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
545544
%r = call reassoc float @llvm.vp.reduce.fadd(float %s, <vscale x 1 x float> %w, <vscale x 1 x i1> %m, i32 %evl)
@@ -549,13 +548,12 @@ define float @vpreduce_fadd_fpext_nxv1f16_nxv1f32(float %s, <vscale x 1 x half>
549548
define float @vpreduce_ord_fadd_fpext_nxv1f16_nxv1f32(float %s, <vscale x 1 x half> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
550549
; CHECK-LABEL: vpreduce_ord_fadd_fpext_nxv1f16_nxv1f32:
551550
; CHECK: # %bb.0:
551+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
552+
; CHECK-NEXT: vfmv.s.f v9, fa0
552553
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
553-
; CHECK-NEXT: vfwcvt.f.f.v v9, v8
554-
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
555-
; CHECK-NEXT: vfmv.s.f v8, fa0
556-
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
557-
; CHECK-NEXT: vfredosum.vs v8, v9, v8, v0.t
558-
; CHECK-NEXT: vfmv.f.s fa0, v8
554+
; CHECK-NEXT: vfwredosum.vs v9, v8, v9, v0.t
555+
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
556+
; CHECK-NEXT: vfmv.f.s fa0, v9
559557
; CHECK-NEXT: ret
560558
%w = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
561559
%r = call float @llvm.vp.reduce.fadd(float %s, <vscale x 1 x float> %w, <vscale x 1 x i1> %m, i32 %evl)
@@ -565,13 +563,12 @@ define float @vpreduce_ord_fadd_fpext_nxv1f16_nxv1f32(float %s, <vscale x 1 x ha
565563
define double @vpreduce_fadd_fpext_nxv1f32_nxv1f64(double %s, <vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
566564
; CHECK-LABEL: vpreduce_fadd_fpext_nxv1f32_nxv1f64:
567565
; CHECK: # %bb.0:
568-
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
569-
; CHECK-NEXT: vfwcvt.f.f.v v9, v8
570566
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
571-
; CHECK-NEXT: vfmv.s.f v8, fa0
572-
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
573-
; CHECK-NEXT: vfredusum.vs v8, v9, v8, v0.t
574-
; CHECK-NEXT: vfmv.f.s fa0, v8
567+
; CHECK-NEXT: vfmv.s.f v9, fa0
568+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
569+
; CHECK-NEXT: vfwredusum.vs v9, v8, v9, v0.t
570+
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
571+
; CHECK-NEXT: vfmv.f.s fa0, v9
575572
; CHECK-NEXT: ret
576573
%w = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
577574
%r = call reassoc double @llvm.vp.reduce.fadd(double %s, <vscale x 1 x double> %w, <vscale x 1 x i1> %m, i32 %evl)
@@ -581,13 +578,12 @@ define double @vpreduce_fadd_fpext_nxv1f32_nxv1f64(double %s, <vscale x 1 x floa
581578
define double @vpreduce_ord_fadd_fpext_nxv1f32_nxv1f64(double %s, <vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
582579
; CHECK-LABEL: vpreduce_ord_fadd_fpext_nxv1f32_nxv1f64:
583580
; CHECK: # %bb.0:
584-
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
585-
; CHECK-NEXT: vfwcvt.f.f.v v9, v8
586581
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
587-
; CHECK-NEXT: vfmv.s.f v8, fa0
588-
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
589-
; CHECK-NEXT: vfredosum.vs v8, v9, v8, v0.t
590-
; CHECK-NEXT: vfmv.f.s fa0, v8
582+
; CHECK-NEXT: vfmv.s.f v9, fa0
583+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
584+
; CHECK-NEXT: vfwredosum.vs v9, v8, v9, v0.t
585+
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
586+
; CHECK-NEXT: vfmv.f.s fa0, v9
591587
; CHECK-NEXT: ret
592588
%w = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
593589
%r = call double @llvm.vp.reduce.fadd(double %s, <vscale x 1 x double> %w, <vscale x 1 x i1> %m, i32 %evl)

0 commit comments

Comments
 (0)