-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VP][RISCV] Introduce vp.lrint/llrint and RISC-V support. #82627
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-llvm-ir Author: Yeting Kuo (yetingk) ChangesRISC-V implements vector lrint/llrint by vfcvt.x.f.v. Patch is 69.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/82627.diff 9 Files Affected:
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index fd2e3aacd0169c..5385057ba657c0 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15990,6 +15990,8 @@ functions would, but without setting errno. If the rounded value is
too large to be stored in the result type, the return value is a
non-deterministic value (equivalent to `freeze poison`).
+.. _int_lrint:
+
'``llvm.lrint.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -16035,6 +16037,8 @@ would, but without setting errno. If the rounded value is too large to
be stored in the result type, the return value is a non-deterministic
value (equivalent to `freeze poison`).
+.. _int_llrint:
+
'``llvm.llrint.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -23344,6 +23348,101 @@ Examples:
%t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison
+.. _int_vp_lrint:
+
+'``llvm.vp.lrint.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare <16 x i32> @llvm.vp.lrint..v16i32.v16f32 (<16 x float> <op>, <16 x i1> <mask>, i32 <vector_length>)
+ declare <vscale x 4 x i32> @llvm.vp.lrint.nxv4i32.nxv4f32 (<vscale x 4 x float> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+ declare <256 x i64> @llvm.vp.lrint.v256i64.v256f64 (<256 x double> <op>, <256 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+Predicated lrint of a vector of floating-point values.
+
+
+Arguments:
+""""""""""
+
+The result is an intger vector and the first operand is an float vector and has
+the same number of elements as the result vector type. The second operand is the
+vector mask and has the same number of elements as the result vector type. The
+third operand is the explicit vector length of the operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.lrint``' intrinsic performs lrint (:ref:`lrint <int_lrint>`) of
+the first vector operand on each enabled lane. The result on disabled lanes is a
+:ref:`poison value <poisonvalues>`.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+ %r = call <4 x i32> @llvm.vp.lrint.v4i32.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl)
+ ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+ %t = call <4 x i32> @llvm.lrint.v4f32(<4 x float> %a)
+ %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison
+
+.. _int_vp_llrint:
+
+'``llvm.vp.llrint.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare <16 x i32> @llvm.vp.llrint..v16i32.v16f32 (<16 x float> <op>, <16 x i1> <mask>, i32 <vector_length>)
+ declare <vscale x 4 x i32> @llvm.vp.llrint.nxv4i32.nxv4f32 (<vscale x 4 x float> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+ declare <256 x i64> @llvm.vp.llrint.v256i64.v256f64 (<256 x double> <op>, <256 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+Predicated llrint of a vector of floating-point values.
+
+
+Arguments:
+""""""""""
+
+The result is an intger vector and the first operand is an float vector and has
+the same number of elements as the result vector type. The second operand is the
+vector mask and has the same number of elements as the result vector type. The
+third operand is the explicit vector length of the operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.llrint``' intrinsic performs lrint (:ref:`llrint <int_llrint>`) of
+the first vector operand on each enabled lane. The result on disabled lanes is a
+:ref:`poison value <poisonvalues>`.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+ %r = call <4 x i32> @llvm.vp.llrint.v4i32.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl)
+ ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+ %t = call <4 x i32> @llvm.llrint.v4f32(<4 x float> %a)
+ %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison
+
+
.. _int_vp_bitreverse:
'``llvm.vp.bitreverse.*``' Intrinsics
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 4becdd71cd440d..ab91abacac6a13 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2036,6 +2036,14 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
[ LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
+ def int_vp_lrint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_llrint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
// Casts
def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 3b32b60609f536..c320f768ce6d82 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -437,6 +437,18 @@ VP_PROPERTY_FUNCTIONAL_INTRINSIC(nearbyint)
VP_PROPERTY_FUNCTIONAL_SDOPC(FNEARBYINT)
END_REGISTER_VP(vp_nearbyint, VP_FNEARBYINT)
+// llvm.vp.lrint(x,mask,vlen)
+BEGIN_REGISTER_VP(vp_lrint, 1, 2, VP_LRINT, 0)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(lrint)
+VP_PROPERTY_FUNCTIONAL_SDOPC(LRINT)
+END_REGISTER_VP(vp_lrint, VP_LRINT)
+
+// llvm.vp.llrint(x,mask,vlen)
+BEGIN_REGISTER_VP(vp_llrint, 1, 2, VP_LLRINT, 0)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(llrint)
+VP_PROPERTY_FUNCTIONAL_SDOPC(LLRINT)
+END_REGISTER_VP(vp_llrint, VP_LLRINT)
+
///// } Floating-Point Arithmetic
///// Type Casts {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7fc252600534f0..87a5ab9cfecb0d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1102,7 +1102,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FRINT:
case ISD::VP_FRINT:
case ISD::LRINT:
+ case ISD::VP_LRINT:
case ISD::LLRINT:
+ case ISD::VP_LLRINT:
case ISD::FROUND:
case ISD::VP_FROUND:
case ISD::FROUNDEVEN:
@@ -4263,6 +4265,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::LRINT:
case ISD::LLRINT:
+ case ISD::VP_LRINT:
+ case ISD::VP_LLRINT:
Res = WidenVecRes_XRINT(N);
break;
@@ -4869,7 +4873,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_XRINT(SDNode *N) {
if (WidenNumElts != SrcVT.getVectorElementCount())
return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
- return DAG.getNode(N->getOpcode(), dl, WidenVT, Src);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, Src);
+
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, Mask, N->getOperand(2));
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4f809ccf7b3019..e6578eeea2c9fe 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -699,7 +699,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
- ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::EXPERIMENTAL_VP_REVERSE,
+ ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
+ ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
ISD::EXPERIMENTAL_VP_SPLICE};
static const unsigned IntegerVecReduceOps[] = {
@@ -5690,6 +5691,9 @@ static unsigned getRISCVVLOp(SDValue Op) {
switch (Op.getOpcode()) {
default:
llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
+ case ISD::VP_LRINT:
+ case ISD::VP_LLRINT:
+ return RISCVISD::VFCVT_X_F_VL;
OP_CASE(ADD)
OP_CASE(SUB)
OP_CASE(MUL)
@@ -6793,6 +6797,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VP_UDIV:
case ISD::VP_SREM:
case ISD::VP_UREM:
+ case ISD::VP_LRINT:
+ case ISD::VP_LLRINT:
return lowerVPOp(Op, DAG);
case ISD::VP_AND:
case ISD::VP_OR:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll
new file mode 100644
index 00000000000000..119d0d75497c43
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll
@@ -0,0 +1,987 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
+
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x, <1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v1i64_v1f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v1i64_v1f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vmv.s.x v8, a0
+; RV64-NEXT: ret
+ %a = call <1 x i64> @llvm.vp.llrint.v1i64.v1f32(<1 x float> %x, <1 x i1> %m, i32 %evl)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.vp.llrint.v1i64.v1f32(<1 x float>, <1 x i1>, i32)
+
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v2i64_v2f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 16
+; RV32-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v2i64_v2f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a1, fa5
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.x v8, a1
+; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: ret
+ %a = call <2 x i64> @llvm.vp.llrint.v2i64.v2f32(<2 x float> %x, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.vp.llrint.v2i64.v2f32(<2 x float>, <2 x i1>, i32)
+
+define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x, <3 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v3i64_v3f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: llrint_v3i64_v3f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a1, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 2
+; RV64-NEXT: vfmv.f.s fa5, v9
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v8, v8, 3
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vslide1down.vx v8, v10, a0
+; RV64-NEXT: ret
+ %a = call <3 x i64> @llvm.vp.llrint.v3i64.v3f32(<3 x float> %x, <3 x i1> %m, i32 %evl)
+ ret <3 x i64> %a
+}
+declare <3 x i64> @llvm.vp.llrint.v3i64.v3f32(<3 x float>, <3 x i1>, i32)
+
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: llrint_v4i64_v4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: call llrintf
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: addi a2, sp, 16
+; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: csrr a0, vlenb
+;...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff 9ca70d72f4f217ff4f6ab337ad4a8e6666860791 f42fbdceeb2819f6df5f7c29e751115f21f856e3 -- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp llvm/lib/IR/IntrinsicInst.cpp llvm/lib/IR/Verifier.cpp llvm/lib/Target/RISCV/RISCVISelLowering.cpp llvm/unittests/IR/VPIntrinsicTest.cpp View the diff from clang-format here.diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 4f321bc516..d783feb267 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6187,7 +6187,8 @@ void Verifier::visitVPIntrinsic(VPIntrinsic &VPI) {
case Intrinsic::vp_llrint:
Check(
RetTy->isIntOrIntVectorTy() && ValTy->isFPOrFPVectorTy(),
- "llvm.vp.fptoui, llvm.vp.fptosi, llvm.vp.lrint or llvm.vp.llrint" "intrinsic first argument element "
+ "llvm.vp.fptoui, llvm.vp.fptosi, llvm.vp.lrint or llvm.vp.llrint"
+ "intrinsic first argument element "
"type must be floating-point and result element type must be integer",
*VPCast);
break;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 0c98642748..143878ff4b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -695,20 +695,42 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
ISD::VP_USUBSAT};
- static const unsigned FloatingPointVPOps[] = {
- ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
- ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
- ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
- ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
- ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
- ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
- ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
- ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
- ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
- ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
- ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
- ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
- ISD::EXPERIMENTAL_VP_SPLICE};
+ static const unsigned FloatingPointVPOps[] = {ISD::VP_FADD,
+ ISD::VP_FSUB,
+ ISD::VP_FMUL,
+ ISD::VP_FDIV,
+ ISD::VP_FNEG,
+ ISD::VP_FABS,
+ ISD::VP_FMA,
+ ISD::VP_REDUCE_FADD,
+ ISD::VP_REDUCE_SEQ_FADD,
+ ISD::VP_REDUCE_FMIN,
+ ISD::VP_REDUCE_FMAX,
+ ISD::VP_MERGE,
+ ISD::VP_SELECT,
+ ISD::VP_SINT_TO_FP,
+ ISD::VP_UINT_TO_FP,
+ ISD::VP_SETCC,
+ ISD::VP_FP_ROUND,
+ ISD::VP_FP_EXTEND,
+ ISD::VP_SQRT,
+ ISD::VP_FMINNUM,
+ ISD::VP_FMAXNUM,
+ ISD::VP_FCEIL,
+ ISD::VP_FFLOOR,
+ ISD::VP_FROUND,
+ ISD::VP_FROUNDEVEN,
+ ISD::VP_FCOPYSIGN,
+ ISD::VP_FROUNDTOZERO,
+ ISD::VP_FRINT,
+ ISD::VP_FNEARBYINT,
+ ISD::VP_IS_FPCLASS,
+ ISD::VP_FMINIMUM,
+ ISD::VP_FMAXIMUM,
+ ISD::VP_LRINT,
+ ISD::VP_LLRINT,
+ ISD::EXPERIMENTAL_VP_REVERSE,
+ ISD::EXPERIMENTAL_VP_SPLICE};
static const unsigned IntegerVecReduceOps[] = {
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
|
@@ -2036,6 +2036,14 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { | |||
[ LLVMMatchType<0>, | |||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, | |||
llvm_i32_ty]>; | |||
def int_vp_lrint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need a Verifier.cpp change to check the element counts like we do for the non-VP versions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
RISC-V implements vector lrint/llrint by vfcvt.x.f.v.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
RISC-V implements vector lrint/llrint by vfcvt.x.f.v.