-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Split fp rounding ops with zvfhmin nxv32f16 #108765
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This adds zvfhmin test coverage for fceil, ffloor, fnearbyint, frint, fround and froundeven and splits them at nxv32f16 to avoid crashing, similarly to what we do for other nodes that we promote. This also sets ftrunc to promote which was previously missing. We already promote the VP version of it, vp_froundtozero.
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesThis adds zvfhmin test coverage for fceil, ffloor, fnearbyint, frint, fround and froundeven and splits them at nxv32f16 to avoid crashing, similarly to what we do for other nodes that we promote. This also sets ftrunc to promote which was previously missing. We already promote the VP version of it, vp_froundtozero. Patch is 95.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108765.diff 8 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6f2dc710cb3d4d..7b1b35160cf460 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -938,12 +938,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// TODO: support more ops.
static const unsigned ZvfhminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
- ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN,
- ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC,
- ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB,
- ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
+ ISD::FCEIL, ISD::FTRUNC, ISD::FFLOOR, ISD::FROUND,
+ ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS,
+ ISD::SETCC, ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD,
+ ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
+ ISD::STRICT_FMA};
// TODO: support more vp ops.
static const unsigned ZvfhminPromoteVPOps[] = {ISD::VP_FADD,
@@ -6926,6 +6927,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::FRINT:
case ISD::FROUND:
case ISD::FROUNDEVEN:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVectorOp(Op, DAG);
return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
case ISD::LRINT:
case ISD::LLRINT:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
index 9efc3183f15a52..111d1d8e07d3bf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
@@ -1,124 +1,256 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) {
-; CHECK-LABEL: ceil_nxv1f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
-; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: ceil_nxv1f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI0_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 3
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: ceil_nxv1f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
ret <vscale x 1 x half> %a
}
declare <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half>)
define <vscale x 2 x half> @ceil_nxv2f16(<vscale x 2 x half> %x) {
-; CHECK-LABEL: ceil_nxv2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
-; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: ceil_nxv2f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI1_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 3
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: ceil_nxv2f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %x)
ret <vscale x 2 x half> %a
}
declare <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half>)
define <vscale x 4 x half> @ceil_nxv4f16(<vscale x 4 x half> %x) {
-; CHECK-LABEL: ceil_nxv4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
-; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: ceil_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI2_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 3
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: ceil_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v10
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
ret <vscale x 4 x half> %a
}
declare <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half>)
define <vscale x 8 x half> @ceil_nxv8f16(<vscale x 8 x half> %x) {
-; CHECK-LABEL: ceil_nxv8f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfabs.v v10, v8
-; CHECK-NEXT: vmflt.vf v0, v10, fa5
-; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: ceil_nxv8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI3_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfabs.v v10, v8
+; ZVFH-NEXT: vmflt.vf v0, v10, fa5
+; ZVFH-NEXT: fsrmi a0, 3
+; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: ceil_nxv8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v12
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %x)
ret <vscale x 8 x half> %a
}
declare <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half>)
define <vscale x 16 x half> @ceil_nxv16f16(<vscale x 16 x half> %x) {
-; CHECK-LABEL: ceil_nxv16f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfabs.v v12, v8
-; CHECK-NEXT: vmflt.vf v0, v12, fa5
-; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: ceil_nxv16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI4_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfabs.v v12, v8
+; ZVFH-NEXT: vmflt.vf v0, v12, fa5
+; ZVFH-NEXT: fsrmi a0, 3
+; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: ceil_nxv16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> %x)
ret <vscale x 16 x half> %a
}
declare <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half>)
define <vscale x 32 x half> @ceil_nxv32f16(<vscale x 32 x half> %x) {
-; CHECK-LABEL: ceil_nxv32f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v8
-; CHECK-NEXT: vmflt.vf v0, v16, fa5
-; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: ceil_nxv32f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI5_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vfabs.v v16, v8
+; ZVFH-NEXT: vmflt.vf v0, v16, fa5
+; ZVFH-NEXT: fsrmi a0, 3
+; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: ceil_nxv32f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v24, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v24, v16
+; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 32 x half> @llvm.ceil.nxv32f16(<vscale x 32 x half> %x)
ret <vscale x 32 x half> %a
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
index ec60b3ed3e0c88..97d84e91744038 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
@@ -1,124 +1,256 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
define <vscale x 1 x half> @floor_nxv1f16(<vscale x 1 x half> %x) {
-; CHECK-LABEL: floor_nxv1f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
-; CHECK-NEXT: fsrmi a0, 2
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: floor_nxv1f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI0_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 2
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: floor_nxv1f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 2
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half> %x)
ret <vscale x 1 x half> %a
}
declare <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half>)
define <vscale x 2 x half> @floor_nxv2f16(<vscale x 2 x half> %x) {
-; CHECK-LABEL: floor_nxv2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
-; CHECK-NEXT: fsrmi a0, 2
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: floor_nxv2f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI1_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 2
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: floor_nxv2f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 2
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ...
[truncated]
|
Marking ftrunc as promoted means these are no longer expanded
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This adds zvfhmin test coverage for fceil, ffloor, fnearbyint, frint, fround and froundeven and splits them at nxv32f16 to avoid crashing, similarly to what we do for other nodes that we promote. This also sets ftrunc to promote which was previously missing. We already promote the VP version of it, vp_froundtozero. Marking it as promoted affects some of the cost model tests since they're no longer expanded.
This adds zvfhmin test coverage for fceil, ffloor, fnearbyint, frint, fround and froundeven and splits them at nxv32f16 to avoid crashing, similarly to what we do for other nodes that we promote.
This also sets ftrunc to promote which was previously missing. We already promote the VP version of it, vp_froundtozero.
Marking it as promoted affects some of the cost model tests since they're no longer expanded.