Skip to content

Commit c036a9a

Browse files
[RISCV][VLOPT] Add vector single width floating point add subtract instructions to isSupportedInstr
1 parent 031f33c commit c036a9a

File tree

3 files changed

+116
-16
lines changed

3 files changed

+116
-16
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,12 @@ static bool isSupportedInstr(const MachineInstr &MI) {
983983
case RISCV::VMSOF_M:
984984
case RISCV::VIOTA_M:
985985
case RISCV::VID_V:
986+
// Vector Single-Width Floating-Point Add/Subtract Instructions
987+
case RISCV::VFADD_VF:
988+
case RISCV::VFADD_VV:
989+
case RISCV::VFSUB_VF:
990+
case RISCV::VFSUB_VV:
991+
case RISCV::VFRSUB_VF:
986992
// Single-Width Floating-Point/Integer Type-Convert Instructions
987993
case RISCV::VFCVT_XU_F_V:
988994
case RISCV::VFCVT_X_F_V:

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,11 @@ define void @fadd_v6f16(ptr %x, ptr %y) {
9393
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
9494
; ZVFHMIN-NEXT: vle16.v v8, (a1)
9595
; ZVFHMIN-NEXT: vle16.v v9, (a0)
96-
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
9796
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
9897
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
9998
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
10099
; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10
101-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
100+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
102101
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
103102
; ZVFHMIN-NEXT: vse16.v v10, (a0)
104103
; ZVFHMIN-NEXT: ret
@@ -229,12 +228,11 @@ define void @fsub_v6f16(ptr %x, ptr %y) {
229228
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
230229
; ZVFHMIN-NEXT: vle16.v v8, (a1)
231230
; ZVFHMIN-NEXT: vle16.v v9, (a0)
232-
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
233231
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
234232
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
235233
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
236234
; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10
237-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
235+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
238236
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
239237
; ZVFHMIN-NEXT: vse16.v v10, (a0)
240238
; ZVFHMIN-NEXT: ret
@@ -2330,13 +2328,12 @@ define void @fadd_vf_v6f16(ptr %x, half %y) {
23302328
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
23312329
; ZVFHMIN-NEXT: vle16.v v8, (a0)
23322330
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2333-
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
23342331
; ZVFHMIN-NEXT: vmv.v.x v9, a1
23352332
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
23362333
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
23372334
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
23382335
; ZVFHMIN-NEXT: vfadd.vv v8, v10, v12
2339-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2336+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
23402337
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
23412338
; ZVFHMIN-NEXT: vse16.v v10, (a0)
23422339
; ZVFHMIN-NEXT: ret
@@ -2472,13 +2469,12 @@ define void @fadd_fv_v6f16(ptr %x, half %y) {
24722469
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
24732470
; ZVFHMIN-NEXT: vle16.v v8, (a0)
24742471
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2475-
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
24762472
; ZVFHMIN-NEXT: vmv.v.x v9, a1
24772473
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
24782474
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
24792475
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
24802476
; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10
2481-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2477+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
24822478
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
24832479
; ZVFHMIN-NEXT: vse16.v v10, (a0)
24842480
; ZVFHMIN-NEXT: ret
@@ -2614,13 +2610,12 @@ define void @fsub_vf_v6f16(ptr %x, half %y) {
26142610
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
26152611
; ZVFHMIN-NEXT: vle16.v v8, (a0)
26162612
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2617-
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
26182613
; ZVFHMIN-NEXT: vmv.v.x v9, a1
26192614
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
26202615
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
26212616
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
26222617
; ZVFHMIN-NEXT: vfsub.vv v8, v10, v12
2623-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2618+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
26242619
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
26252620
; ZVFHMIN-NEXT: vse16.v v10, (a0)
26262621
; ZVFHMIN-NEXT: ret
@@ -2756,13 +2751,12 @@ define void @fsub_fv_v6f16(ptr %x, half %y) {
27562751
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
27572752
; ZVFHMIN-NEXT: vle16.v v8, (a0)
27582753
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2759-
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
27602754
; ZVFHMIN-NEXT: vmv.v.x v9, a1
27612755
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
27622756
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
27632757
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
27642758
; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10
2765-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2759+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
27662760
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
27672761
; ZVFHMIN-NEXT: vse16.v v10, (a0)
27682762
; ZVFHMIN-NEXT: ret
@@ -5004,13 +4998,13 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
50044998
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
50054999
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
50065000
; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12
5007-
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
5001+
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
50085002
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
50095003
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
50105004
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
50115005
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
50125006
; ZVFHMIN-NEXT: vfadd.vv v8, v8, v12
5013-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5007+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
50145008
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
50155009
; ZVFHMIN-NEXT: vse16.v v10, (a0)
50165010
; ZVFHMIN-NEXT: ret
@@ -5181,13 +5175,13 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
51815175
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
51825176
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
51835177
; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12
5184-
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
5178+
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
51855179
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
51865180
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
51875181
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
51885182
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
51895183
; ZVFHMIN-NEXT: vfsub.vv v8, v8, v12
5190-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5184+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
51915185
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
51925186
; ZVFHMIN-NEXT: vse16.v v10, (a0)
51935187
; ZVFHMIN-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2925,3 +2925,103 @@ define <vscale x 4 x i32> @vid.v(<vscale x 4 x i32> %c, iXLen %vl) {
29252925
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %c, iXLen %vl)
29262926
ret <vscale x 4 x i32> %2
29272927
}
2928+
2929+
define <vscale x 4 x float> @vfadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
2930+
; NOVLOPT-LABEL: vfadd_vv:
2931+
; NOVLOPT: # %bb.0:
2932+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
2933+
; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
2934+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
2935+
; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
2936+
; NOVLOPT-NEXT: ret
2937+
;
2938+
; VLOPT-LABEL: vfadd_vv:
2939+
; VLOPT: # %bb.0:
2940+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
2941+
; VLOPT-NEXT: vfadd.vv v8, v8, v10
2942+
; VLOPT-NEXT: vfadd.vv v8, v8, v10
2943+
; VLOPT-NEXT: ret
2944+
%1 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1)
2945+
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl)
2946+
ret <vscale x 4 x float> %2
2947+
}
2948+
2949+
define <vscale x 4 x float> @vfadd_vx(<vscale x 4 x float> %a, float %b, iXLen %vl) {
2950+
; NOVLOPT-LABEL: vfadd_vx:
2951+
; NOVLOPT: # %bb.0:
2952+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
2953+
; NOVLOPT-NEXT: vfadd.vf v10, v8, fa0
2954+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
2955+
; NOVLOPT-NEXT: vfadd.vv v8, v10, v8
2956+
; NOVLOPT-NEXT: ret
2957+
;
2958+
; VLOPT-LABEL: vfadd_vx:
2959+
; VLOPT: # %bb.0:
2960+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
2961+
; VLOPT-NEXT: vfadd.vf v10, v8, fa0
2962+
; VLOPT-NEXT: vfadd.vv v8, v10, v8
2963+
; VLOPT-NEXT: ret
2964+
%1 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
2965+
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
2966+
ret <vscale x 4 x float> %2
2967+
}
2968+
2969+
define <vscale x 4 x float> @vfsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
2970+
; NOVLOPT-LABEL: vfsub_vv:
2971+
; NOVLOPT: # %bb.0:
2972+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
2973+
; NOVLOPT-NEXT: vfsub.vv v8, v8, v10
2974+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
2975+
; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
2976+
; NOVLOPT-NEXT: ret
2977+
;
2978+
; VLOPT-LABEL: vfsub_vv:
2979+
; VLOPT: # %bb.0:
2980+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
2981+
; VLOPT-NEXT: vfsub.vv v8, v8, v10
2982+
; VLOPT-NEXT: vfadd.vv v8, v8, v10
2983+
; VLOPT-NEXT: ret
2984+
%1 = call <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1)
2985+
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl)
2986+
ret <vscale x 4 x float> %2
2987+
}
2988+
2989+
define <vscale x 4 x float> @vfsub_vx(<vscale x 4 x float> %a, float %b, iXLen %vl) {
2990+
; NOVLOPT-LABEL: vfsub_vx:
2991+
; NOVLOPT: # %bb.0:
2992+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
2993+
; NOVLOPT-NEXT: vfsub.vf v10, v8, fa0
2994+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
2995+
; NOVLOPT-NEXT: vfadd.vv v8, v10, v8
2996+
; NOVLOPT-NEXT: ret
2997+
;
2998+
; VLOPT-LABEL: vfsub_vx:
2999+
; VLOPT: # %bb.0:
3000+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
3001+
; VLOPT-NEXT: vfsub.vf v10, v8, fa0
3002+
; VLOPT-NEXT: vfadd.vv v8, v10, v8
3003+
; VLOPT-NEXT: ret
3004+
%1 = call <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
3005+
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
3006+
ret <vscale x 4 x float> %2
3007+
}
3008+
3009+
define <vscale x 4 x float> @vfrsub_vx(<vscale x 4 x float> %a, float %b, iXLen %vl) {
3010+
; NOVLOPT-LABEL: vfrsub_vx:
3011+
; NOVLOPT: # %bb.0:
3012+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
3013+
; NOVLOPT-NEXT: vfrsub.vf v10, v8, fa0
3014+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
3015+
; NOVLOPT-NEXT: vfadd.vv v8, v10, v8
3016+
; NOVLOPT-NEXT: ret
3017+
;
3018+
; VLOPT-LABEL: vfrsub_vx:
3019+
; VLOPT: # %bb.0:
3020+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
3021+
; VLOPT-NEXT: vfrsub.vf v10, v8, fa0
3022+
; VLOPT-NEXT: vfadd.vv v8, v10, v8
3023+
; VLOPT-NEXT: ret
3024+
%1 = call <vscale x 4 x float> @llvm.riscv.vfrsub.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
3025+
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
3026+
ret <vscale x 4 x float> %2
3027+
}

0 commit comments

Comments
 (0)