Skip to content

Commit b48e5f0

Browse files
[RISCV][VLOPT] Add Vector FP instructions to getOperandInfo (#121609)
Although we cannot reduce the VL of these instructions (i.e. add to isSupported) we can add them to getOperandInfo to enable optimization where the FP vector instruction are users. Most of the instructions are covered by existing tests, and I added tests for the narrowing conversions because I was a little unsure whether the dest or the source was 2*SEW and 2*LMUL.
1 parent a37dbc1 commit b48e5f0

File tree

8 files changed

+187
-120
lines changed

8 files changed

+187
-120
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 99 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,49 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
456456
case RISCV::VCOMPRESS_VM:
457457
// Vector Element Index Instruction
458458
case RISCV::VID_V:
459+
// Vector Single-Width Floating-Point Add/Subtract Instructions
460+
case RISCV::VFADD_VF:
461+
case RISCV::VFADD_VV:
462+
case RISCV::VFSUB_VF:
463+
case RISCV::VFSUB_VV:
464+
case RISCV::VFRSUB_VF:
465+
// Vector Single-Width Floating-Point Multiply/Divide Instructions
466+
case RISCV::VFMUL_VF:
467+
case RISCV::VFMUL_VV:
468+
case RISCV::VFDIV_VF:
469+
case RISCV::VFDIV_VV:
470+
case RISCV::VFRDIV_VF:
471+
// Vector Floating-Point Square-Root Instruction
472+
case RISCV::VFSQRT_V:
473+
// Vector Floating-Point Reciprocal Square-Root Estimate Instruction
474+
case RISCV::VFRSQRT7_V:
475+
// Vector Floating-Point Reciprocal Estimate Instruction
476+
case RISCV::VFREC7_V:
477+
// Vector Floating-Point MIN/MAX Instructions
478+
case RISCV::VFMIN_VF:
479+
case RISCV::VFMIN_VV:
480+
case RISCV::VFMAX_VF:
481+
case RISCV::VFMAX_VV:
482+
// Vector Floating-Point Sign-Injection Instructions
483+
case RISCV::VFSGNJ_VF:
484+
case RISCV::VFSGNJ_VV:
485+
case RISCV::VFSGNJN_VV:
486+
case RISCV::VFSGNJN_VF:
487+
case RISCV::VFSGNJX_VF:
488+
case RISCV::VFSGNJX_VV:
489+
// Vector Floating-Point Classify Instruction
490+
case RISCV::VFCLASS_V:
491+
// Vector Floating-Point Move Instruction
492+
case RISCV::VFMV_V_F:
493+
// Single-Width Floating-Point/Integer Type-Convert Instructions
494+
case RISCV::VFCVT_XU_F_V:
495+
case RISCV::VFCVT_X_F_V:
496+
case RISCV::VFCVT_RTZ_XU_F_V:
497+
case RISCV::VFCVT_RTZ_X_F_V:
498+
case RISCV::VFCVT_F_XU_V:
499+
case RISCV::VFCVT_F_X_V:
500+
// Vector Floating-Point Merge Instruction
501+
case RISCV::VFMERGE_VFM:
459502
return OperandInfo(MIVLMul, MILog2SEW);
460503

461504
// Vector Widening Integer Add/Subtract
@@ -488,7 +531,33 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
488531
case RISCV::VWMACC_VX:
489532
case RISCV::VWMACCSU_VV:
490533
case RISCV::VWMACCSU_VX:
491-
case RISCV::VWMACCUS_VX: {
534+
case RISCV::VWMACCUS_VX:
535+
// Vector Widening Floating-Point Fused Multiply-Add Instructions
536+
case RISCV::VFWMACC_VF:
537+
case RISCV::VFWMACC_VV:
538+
case RISCV::VFWNMACC_VF:
539+
case RISCV::VFWNMACC_VV:
540+
case RISCV::VFWMSAC_VF:
541+
case RISCV::VFWMSAC_VV:
542+
case RISCV::VFWNMSAC_VF:
543+
case RISCV::VFWNMSAC_VV:
544+
// Vector Widening Floating-Point Add/Subtract Instructions
545+
// Dest EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
546+
case RISCV::VFWADD_VV:
547+
case RISCV::VFWADD_VF:
548+
case RISCV::VFWSUB_VV:
549+
case RISCV::VFWSUB_VF:
550+
// Vector Widening Floating-Point Multiply
551+
case RISCV::VFWMUL_VF:
552+
case RISCV::VFWMUL_VV:
553+
// Widening Floating-Point/Integer Type-Convert Instructions
554+
case RISCV::VFWCVT_XU_F_V:
555+
case RISCV::VFWCVT_X_F_V:
556+
case RISCV::VFWCVT_RTZ_XU_F_V:
557+
case RISCV::VFWCVT_RTZ_X_F_V:
558+
case RISCV::VFWCVT_F_XU_V:
559+
case RISCV::VFWCVT_F_X_V:
560+
case RISCV::VFWCVT_F_F_V: {
492561
unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
493562
RISCVII::VLMUL EMUL =
494563
IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
@@ -503,7 +572,12 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
503572
case RISCV::VWADD_WV:
504573
case RISCV::VWADD_WX:
505574
case RISCV::VWSUB_WV:
506-
case RISCV::VWSUB_WX: {
575+
case RISCV::VWSUB_WX:
576+
// Vector Widening Floating-Point Add/Subtract Instructions
577+
case RISCV::VFWADD_WF:
578+
case RISCV::VFWADD_WV:
579+
case RISCV::VFWSUB_WF:
580+
case RISCV::VFWSUB_WV: {
507581
bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1;
508582
bool TwoTimes = IsMODef || IsOp1;
509583
unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
@@ -539,7 +613,16 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
539613
case RISCV::VNCLIPU_WX:
540614
case RISCV::VNCLIP_WI:
541615
case RISCV::VNCLIP_WV:
542-
case RISCV::VNCLIP_WX: {
616+
case RISCV::VNCLIP_WX:
617+
// Narrowing Floating-Point/Integer Type-Convert Instructions
618+
case RISCV::VFNCVT_XU_F_W:
619+
case RISCV::VFNCVT_X_F_W:
620+
case RISCV::VFNCVT_RTZ_XU_F_W:
621+
case RISCV::VFNCVT_RTZ_X_F_W:
622+
case RISCV::VFNCVT_F_XU_W:
623+
case RISCV::VFNCVT_F_X_W:
624+
case RISCV::VFNCVT_F_F_W:
625+
case RISCV::VFNCVT_ROD_F_F_W: {
543626
bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1;
544627
bool TwoTimes = IsOp1;
545628
unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
@@ -615,7 +698,19 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
615698
case RISCV::VMADC_VI:
616699
case RISCV::VMADC_VX:
617700
case RISCV::VMSBC_VV:
618-
case RISCV::VMSBC_VX: {
701+
case RISCV::VMSBC_VX:
702+
// 13.13. Vector Floating-Point Compare Instructions
703+
// Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW EMUL=LMUL.
704+
case RISCV::VMFEQ_VF:
705+
case RISCV::VMFEQ_VV:
706+
case RISCV::VMFNE_VF:
707+
case RISCV::VMFNE_VV:
708+
case RISCV::VMFLT_VF:
709+
case RISCV::VMFLT_VV:
710+
case RISCV::VMFLE_VF:
711+
case RISCV::VMFLE_VV:
712+
case RISCV::VMFGT_VF:
713+
case RISCV::VMFGE_VF: {
619714
if (IsMODef)
620715
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
621716
return OperandInfo(MIVLMul, MILog2SEW);

llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32(<vscale x 1 x half> %va, half %b,
143143
; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32:
144144
; ZVFHMIN: # %bb.0:
145145
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
146-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
147-
; ZVFHMIN-NEXT: vmv.v.x v10, a1
148146
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
147+
; ZVFHMIN-NEXT: vmv.v.x v10, a1
149148
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
150149
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t
151150
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
@@ -170,9 +169,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute(<vscale x 1 x half> %va,
170169
; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute:
171170
; ZVFHMIN: # %bb.0:
172171
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
173-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
174-
; ZVFHMIN-NEXT: vmv.v.x v11, a1
175172
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
173+
; ZVFHMIN-NEXT: vmv.v.x v11, a1
176174
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
177175
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
178176
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
@@ -198,9 +196,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked(<vscale x 1 x half> %va,
198196
; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked:
199197
; ZVFHMIN: # %bb.0:
200198
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
201-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
202-
; ZVFHMIN-NEXT: vmv.v.x v10, a1
203199
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
200+
; ZVFHMIN-NEXT: vmv.v.x v10, a1
204201
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
205202
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
206203
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
@@ -225,9 +222,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_tu(<vscale x 1 x half> %va, half
225222
; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_tu:
226223
; ZVFHMIN: # %bb.0:
227224
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
228-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
229-
; ZVFHMIN-NEXT: vmv.v.x v10, a1
230225
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
226+
; ZVFHMIN-NEXT: vmv.v.x v10, a1
231227
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
232228
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
233229
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
@@ -254,9 +250,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute_tu(<vscale x 1 x half> %v
254250
; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute_tu:
255251
; ZVFHMIN: # %bb.0:
256252
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
257-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
258-
; ZVFHMIN-NEXT: vmv.v.x v10, a1
259253
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
254+
; ZVFHMIN-NEXT: vmv.v.x v10, a1
260255
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
261256
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
262257
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
@@ -283,9 +278,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked_tu(<vscale x 1 x half> %
283278
; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked_tu:
284279
; ZVFHMIN: # %bb.0:
285280
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
286-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
287-
; ZVFHMIN-NEXT: vmv.v.x v10, a1
288281
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
282+
; ZVFHMIN-NEXT: vmv.v.x v10, a1
289283
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
290284
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
291285
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
@@ -362,9 +356,8 @@ define <vscale x 2 x float> @vfmacc_vf_nxv2f32(<vscale x 2 x half> %va, half %b,
362356
; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32:
363357
; ZVFHMIN: # %bb.0:
364358
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
365-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
366-
; ZVFHMIN-NEXT: vmv.v.x v10, a1
367359
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
360+
; ZVFHMIN-NEXT: vmv.v.x v10, a1
368361
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
369362
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t
370363
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
@@ -389,9 +382,8 @@ define <vscale x 2 x float> @vfmacc_vf_nxv2f32_unmasked(<vscale x 2 x half> %va,
389382
; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32_unmasked:
390383
; ZVFHMIN: # %bb.0:
391384
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
392-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
393-
; ZVFHMIN-NEXT: vmv.v.x v10, a1
394385
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
386+
; ZVFHMIN-NEXT: vmv.v.x v10, a1
395387
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
396388
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
397389
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
@@ -468,9 +460,8 @@ define <vscale x 4 x float> @vfmacc_vf_nxv4f32(<vscale x 4 x half> %va, half %b,
468460
; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32:
469461
; ZVFHMIN: # %bb.0:
470462
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
471-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma
472-
; ZVFHMIN-NEXT: vmv.v.x v12, a1
473463
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
464+
; ZVFHMIN-NEXT: vmv.v.x v12, a1
474465
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
475466
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
476467
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -495,9 +486,8 @@ define <vscale x 4 x float> @vfmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %va,
495486
; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32_unmasked:
496487
; ZVFHMIN: # %bb.0:
497488
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
498-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma
499-
; ZVFHMIN-NEXT: vmv.v.x v12, a1
500489
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
490+
; ZVFHMIN-NEXT: vmv.v.x v12, a1
501491
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
502492
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
503493
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -574,9 +564,8 @@ define <vscale x 8 x float> @vfmacc_vf_nxv8f32(<vscale x 8 x half> %va, half %b,
574564
; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32:
575565
; ZVFHMIN: # %bb.0:
576566
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
577-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma
578-
; ZVFHMIN-NEXT: vmv.v.x v16, a1
579567
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
568+
; ZVFHMIN-NEXT: vmv.v.x v16, a1
580569
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
581570
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
582571
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -601,9 +590,8 @@ define <vscale x 8 x float> @vfmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %va,
601590
; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32_unmasked:
602591
; ZVFHMIN: # %bb.0:
603592
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
604-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma
605-
; ZVFHMIN-NEXT: vmv.v.x v16, a1
606593
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
594+
; ZVFHMIN-NEXT: vmv.v.x v16, a1
607595
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
608596
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
609597
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -696,9 +684,8 @@ define <vscale x 16 x float> @vfmacc_vf_nxv16f32(<vscale x 16 x half> %va, half
696684
; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32:
697685
; ZVFHMIN: # %bb.0:
698686
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
699-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
700-
; ZVFHMIN-NEXT: vmv.v.x v4, a1
701687
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
688+
; ZVFHMIN-NEXT: vmv.v.x v4, a1
702689
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
703690
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t
704691
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
@@ -723,9 +710,8 @@ define <vscale x 16 x float> @vfmacc_vf_nxv16f32_unmasked(<vscale x 16 x half> %
723710
; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32_unmasked:
724711
; ZVFHMIN: # %bb.0:
725712
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
726-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
727-
; ZVFHMIN-NEXT: vmv.v.x v24, a1
728713
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
714+
; ZVFHMIN-NEXT: vmv.v.x v24, a1
729715
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
730716
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
731717
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma

0 commit comments

Comments
 (0)