Skip to content

Commit fb39445

Browse files
authored
[RISCV][VLOPT] Add vfsqrt/vfrsqrt7 instruction to isSupportInstr (#127462)
1 parent a44284c commit fb39445

File tree

4 files changed

+94
-4
lines changed

4 files changed

+94
-4
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,6 +1092,10 @@ static bool isSupportedInstr(const MachineInstr &MI) {
10921092
case RISCV::VFWNMSAC_VF:
10931093
case RISCV::VFWMACCBF16_VV:
10941094
case RISCV::VFWMACCBF16_VF:
1095+
// Vector Floating-Point Square-Root Instruction
1096+
case RISCV::VFSQRT_V:
1097+
// Vector Floating-Point Reciprocal Square-Root Estimate Instruction
1098+
case RISCV::VFRSQRT7_V:
10951099
// Vector Floating-Point MIN/MAX Instructions
10961100
case RISCV::VFMIN_VF:
10971101
case RISCV::VFMIN_VV:

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1318,11 +1318,10 @@ define void @sqrt_v6bf16(ptr %x) {
13181318
; CHECK: # %bb.0:
13191319
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
13201320
; CHECK-NEXT: vle16.v v8, (a0)
1321-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13221321
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
13231322
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
13241323
; CHECK-NEXT: vfsqrt.v v8, v10
1325-
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1324+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
13261325
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
13271326
; CHECK-NEXT: vse16.v v10, (a0)
13281327
; CHECK-NEXT: ret
@@ -1371,11 +1370,10 @@ define void @sqrt_v6f16(ptr %x) {
13711370
; ZVFHMIN: # %bb.0:
13721371
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
13731372
; ZVFHMIN-NEXT: vle16.v v8, (a0)
1374-
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13751373
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
13761374
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
13771375
; ZVFHMIN-NEXT: vfsqrt.v v8, v10
1378-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1376+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
13791377
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
13801378
; ZVFHMIN-NEXT: vse16.v v10, (a0)
13811379
; ZVFHMIN-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5069,3 +5069,51 @@ define <vscale x 4 x float> @vfwmaccbf16_vf(<vscale x 4 x float> %a, bfloat %b,
50695069
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %d, iXLen 7, iXLen %vl)
50705070
ret <vscale x 4 x float> %2
50715071
}
5072+
5073+
define <vscale x 4 x double> @vfsqrt(<vscale x 4 x float> %a) {
5074+
; NOVLOPT-LABEL: vfsqrt:
5075+
; NOVLOPT: # %bb.0:
5076+
; NOVLOPT-NEXT: fsrmi a0, 0
5077+
; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma
5078+
; NOVLOPT-NEXT: vfsqrt.v v10, v8
5079+
; NOVLOPT-NEXT: fsrm a0
5080+
; NOVLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma
5081+
; NOVLOPT-NEXT: vfwmacc.vv v12, v8, v10
5082+
; NOVLOPT-NEXT: vmv4r.v v8, v12
5083+
; NOVLOPT-NEXT: ret
5084+
;
5085+
; VLOPT-LABEL: vfsqrt:
5086+
; VLOPT: # %bb.0:
5087+
; VLOPT-NEXT: fsrmi a0, 0
5088+
; VLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma
5089+
; VLOPT-NEXT: vfsqrt.v v10, v8
5090+
; VLOPT-NEXT: fsrm a0
5091+
; VLOPT-NEXT: vfwmacc.vv v12, v8, v10
5092+
; VLOPT-NEXT: vmv4r.v v8, v12
5093+
; VLOPT-NEXT: ret
5094+
%1 = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 0, iXLen 7)
5095+
%2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0)
5096+
ret <vscale x 4 x double> %2
5097+
}
5098+
5099+
define <vscale x 4 x double> @vfrsqrt7(<vscale x 4 x float> %a) {
5100+
; NOVLOPT-LABEL: vfrsqrt7:
5101+
; NOVLOPT: # %bb.0:
5102+
; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma
5103+
; NOVLOPT-NEXT: vfrsqrt7.v v10, v8
5104+
; NOVLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma
5105+
; NOVLOPT-NEXT: vfwmacc.vv v12, v8, v10
5106+
; NOVLOPT-NEXT: vmv4r.v v8, v12
5107+
; NOVLOPT-NEXT: ret
5108+
;
5109+
; VLOPT-LABEL: vfrsqrt7:
5110+
; VLOPT: # %bb.0:
5111+
; VLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma
5112+
; VLOPT-NEXT: vfrsqrt7.v v10, v8
5113+
; VLOPT-NEXT: vfwmacc.vv v12, v8, v10
5114+
; VLOPT-NEXT: vmv4r.v v8, v12
5115+
; VLOPT-NEXT: ret
5116+
%1 = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 7)
5117+
%2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0)
5118+
ret <vscale x 4 x double> %2
5119+
}

llvm/test/CodeGen/RISCV/rvv/vl-opt.mir

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,46 @@ body: |
141141
%y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0
142142
...
143143
---
144+
name: vfsqrt_nofpexcept
145+
body: |
146+
bb.0:
147+
; CHECK-LABEL: name: vfsqrt_nofpexcept
148+
; CHECK: %x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 6, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
149+
; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm
150+
%x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm
151+
early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm
152+
...
153+
---
154+
name: vfsqrt_fpexcept
155+
body: |
156+
bb.0:
157+
; CHECK-LABEL: name: vfsqrt_fpexcept
158+
; CHECK: %x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
159+
; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm
160+
%x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm
161+
early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm
162+
...
163+
---
164+
name: vfrsqrt7_nofpexcept
165+
body: |
166+
bb.0:
167+
; CHECK-LABEL: name: vfrsqrt7_nofpexcept
168+
; CHECK: %x:vrm2 = nofpexcept PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
169+
; CHECK-NEXT: %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
170+
%x:vrm2 = nofpexcept PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0
171+
%y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0
172+
...
173+
---
174+
name: vfrsqrt7_fpexcept
175+
body: |
176+
bb.0:
177+
; CHECK-LABEL: name: vfrsqrt7_fpexcept
178+
; CHECK: %x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5 /* e32 */, 0 /* tu, mu */
179+
; CHECK-NEXT: %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
180+
%x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0
181+
%y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0
182+
...
183+
---
144184
name: vwadd_tied_vs1
145185
body: |
146186
bb.0:

0 commit comments

Comments
 (0)