Skip to content

Commit cf6bd3f

Browse files
committed
[LV][RISCV] Introduce llvm.vp.minimum/maximum intrinsics
Although there are predicated versions of minnum/maxnum, the ones for minimum/maximum are currently missing. This patch introduces these intrinsics and implements their lowering to RISC-V.
1 parent 16cd344 commit cf6bd3f

File tree

11 files changed

+3457
-8
lines changed

11 files changed

+3457
-8
lines changed

llvm/docs/LangRef.rst

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20378,6 +20378,106 @@ Examples:
2037820378
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison
2037920379

2038020380

20381+
.. _int_vp_minimum:
20382+
20383+
'``llvm.vp.minimum.*``' Intrinsics
20384+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
20385+
20386+
Syntax:
20387+
"""""""
20388+
This is an overloaded intrinsic.
20389+
20390+
::
20391+
20392+
declare <16 x float> @llvm.vp.minimum.v16f32 (<16 x float> <left_op>, <16 x float> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
20393+
declare <vscale x 4 x float> @llvm.vp.minimum.nxv4f32 (<vscale x 4 x float> <left_op>, <vscale x 4 x float> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
20394+
declare <256 x double> @llvm.vp.minimum.v256f64 (<256 x double> <left_op>, <256 x double> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
20395+
20396+
Overview:
20397+
"""""""""
20398+
20399+
Predicated floating-point minimum of two vectors of floating-point values,
20400+
propagating NaNs and treating -0.0 as less than +0.0.
20401+
20402+
Arguments:
20403+
""""""""""
20404+
20405+
The first two operands and the result have the same vector of floating-point type. The
20406+
third operand is the vector mask and has the same number of elements as the
20407+
result vector type. The fourth operand is the explicit vector length of the
20408+
operation.
20409+
20410+
Semantics:
20411+
""""""""""
20412+
20413+
The '``llvm.vp.minimum``' intrinsic performs floating-point minimum (:ref:`minimum <i_minimum>`)
20414+
of the first and second vector operand on each enabled lane, the result being
20415+
NaN if either operand is a NaN. -0.0 is considered to be less than +0.0 for this
20416+
intrinsic. The result on disabled lanes is a :ref:`poison value <poisonvalues>`.
20417+
The operation is performed in the default floating-point environment.
20418+
20419+
Examples:
20420+
"""""""""
20421+
20422+
.. code-block:: llvm
20423+
20424+
%r = call <4 x float> @llvm.vp.minimum.v4f32(<4 x float> %a, <4 x float> %b, <4 x i1> %mask, i32 %evl)
20425+
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
20426+
20427+
%t = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b)
20428+
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison
20429+
20430+
20431+
.. _int_vp_maximum:
20432+
20433+
'``llvm.vp.maximum.*``' Intrinsics
20434+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
20435+
20436+
Syntax:
20437+
"""""""
20438+
This is an overloaded intrinsic.
20439+
20440+
::
20441+
20442+
declare <16 x float> @llvm.vp.maximum.v16f32 (<16 x float> <left_op>, <16 x float> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
20443+
declare <vscale x 4 x float> @llvm.vp.maximum.nxv4f32 (<vscale x 4 x float> <left_op>, <vscale x 4 x float> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
20444+
declare <256 x double> @llvm.vp.maximum.v256f64 (<256 x double> <left_op>, <256 x double> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
20445+
20446+
Overview:
20447+
"""""""""
20448+
20449+
Predicated floating-point maximum of two vectors of floating-point values,
20450+
propagating NaNs and treating -0.0 as less than +0.0.
20451+
20452+
Arguments:
20453+
""""""""""
20454+
20455+
The first two operands and the result have the same vector of floating-point type. The
20456+
third operand is the vector mask and has the same number of elements as the
20457+
result vector type. The fourth operand is the explicit vector length of the
20458+
operation.
20459+
20460+
Semantics:
20461+
""""""""""
20462+
20463+
The '``llvm.vp.maximum``' intrinsic performs floating-point maximum (:ref:`maximum <i_maximum>`)
20464+
of the first and second vector operand on each enabled lane, the result being
20465+
NaN if either operand is a NaN. -0.0 is considered to be less than +0.0 for this
20466+
intrinsic. The result on disabled lanes is a :ref:`poison value <poisonvalues>`.
20467+
The operation is performed in the default floating-point environment.
20468+
20469+
Examples:
20470+
"""""""""
20471+
20472+
.. code-block:: llvm
20473+
20474+
%r = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %a, <4 x float> %b, <4 x i1> %mask, i32 %evl)
20475+
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
20476+
20477+
%t = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b, <4 x i1> %mask, i32 %evl)
20478+
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison
20479+
20480+
2038120481
.. _int_vp_fadd:
2038220482

2038320483
'``llvm.vp.fadd.*``' Intrinsics

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1991,6 +1991,16 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
19911991
LLVMMatchType<0>,
19921992
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
19931993
llvm_i32_ty]>;
1994+
def int_vp_minimum : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
1995+
[ LLVMMatchType<0>,
1996+
LLVMMatchType<0>,
1997+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1998+
llvm_i32_ty]>;
1999+
def int_vp_maximum : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
2000+
[ LLVMMatchType<0>,
2001+
LLVMMatchType<0>,
2002+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
2003+
llvm_i32_ty]>;
19942004
def int_vp_copysign : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
19952005
[ LLVMMatchType<0>,
19962006
LLVMMatchType<0>,

llvm/include/llvm/IR/VPIntrinsics.def

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -367,20 +367,34 @@ VP_PROPERTY_FUNCTIONAL_SDOPC(FCOPYSIGN)
367367
VP_PROPERTY_FUNCTIONAL_INTRINSIC(copysign)
368368
END_REGISTER_VP(vp_copysign, VP_FCOPYSIGN)
369369

370-
// llvm.vp.minnum(x, y, mask,vlen)
370+
// llvm.vp.minnum(x,y,mask,vlen)
371371
BEGIN_REGISTER_VP(vp_minnum, 2, 3, VP_FMINNUM, -1)
372372
VP_PROPERTY_BINARYOP
373373
VP_PROPERTY_FUNCTIONAL_SDOPC(FMINNUM)
374374
VP_PROPERTY_FUNCTIONAL_INTRINSIC(minnum)
375375
END_REGISTER_VP(vp_minnum, VP_FMINNUM)
376376

377-
// llvm.vp.maxnum(x, y, mask,vlen)
377+
// llvm.vp.maxnum(x,y,mask,vlen)
378378
BEGIN_REGISTER_VP(vp_maxnum, 2, 3, VP_FMAXNUM, -1)
379379
VP_PROPERTY_BINARYOP
380380
VP_PROPERTY_FUNCTIONAL_SDOPC(FMAXNUM)
381381
VP_PROPERTY_FUNCTIONAL_INTRINSIC(maxnum)
382382
END_REGISTER_VP(vp_maxnum, VP_FMAXNUM)
383383

384+
// llvm.vp.minimum(x,y,mask,vlen)
385+
BEGIN_REGISTER_VP(vp_minimum, 2, 3, VP_FMINIMUM, -1)
386+
VP_PROPERTY_BINARYOP
387+
VP_PROPERTY_FUNCTIONAL_SDOPC(FMINIMUM)
388+
VP_PROPERTY_FUNCTIONAL_INTRINSIC(minimum)
389+
END_REGISTER_VP(vp_minimum, VP_FMINIMUM)
390+
391+
// llvm.vp.maximum(x,y,mask,vlen)
392+
BEGIN_REGISTER_VP(vp_maximum, 2, 3, VP_FMAXIMUM, -1)
393+
VP_PROPERTY_BINARYOP
394+
VP_PROPERTY_FUNCTIONAL_SDOPC(FMAXIMUM)
395+
VP_PROPERTY_FUNCTIONAL_INTRINSIC(maximum)
396+
END_REGISTER_VP(vp_maximum, VP_FMAXIMUM)
397+
384398
// llvm.vp.ceil(x,mask,vlen)
385399
BEGIN_REGISTER_VP(vp_ceil, 1, 2, VP_FCEIL, -1)
386400
VP_PROPERTY_FUNCTIONAL_INTRINSIC(ceil)

llvm/lib/CodeGen/ExpandVectorPredication.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,8 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
729729
case Intrinsic::vp_sqrt:
730730
case Intrinsic::vp_maxnum:
731731
case Intrinsic::vp_minnum:
732+
case Intrinsic::vp_maximum:
733+
case Intrinsic::vp_minimum:
732734
return expandPredicationToFPCall(Builder, VPI,
733735
VPI.getFunctionalIntrinsicID().value());
734736
case Intrinsic::vp_load:

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,7 +1143,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
11431143
case ISD::FMINNUM: case ISD::VP_FMINNUM:
11441144
case ISD::FMAXNUM: case ISD::VP_FMAXNUM:
11451145
case ISD::FMINIMUM:
1146+
case ISD::VP_FMINIMUM:
11461147
case ISD::FMAXIMUM:
1148+
case ISD::VP_FMAXIMUM:
11471149
case ISD::SDIV: case ISD::VP_SDIV:
11481150
case ISD::UDIV: case ISD::VP_UDIV:
11491151
case ISD::FDIV: case ISD::VP_FDIV:
@@ -4131,7 +4133,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
41314133
case ISD::FMINNUM: case ISD::VP_FMINNUM:
41324134
case ISD::FMAXNUM: case ISD::VP_FMAXNUM:
41334135
case ISD::FMINIMUM:
4136+
case ISD::VP_FMINIMUM:
41344137
case ISD::FMAXIMUM:
4138+
case ISD::VP_FMAXIMUM:
41354139
case ISD::SMIN: case ISD::VP_SMIN:
41364140
case ISD::SMAX: case ISD::VP_SMAX:
41374141
case ISD::UMIN: case ISD::VP_UMIN:

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
688688
ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
689689
ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
690690
ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
691-
ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE};
691+
ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::EXPERIMENTAL_VP_REVERSE,
692+
ISD::EXPERIMENTAL_VP_SPLICE};
692693

693694
static const unsigned IntegerVecReduceOps[] = {
694695
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
@@ -927,7 +928,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
927928
ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
928929
ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
929930
ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
930-
ISD::VP_FNEARBYINT, ISD::VP_SETCC};
931+
ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
932+
ISD::VP_FMAXIMUM};
931933

932934
// Sets common operation actions on RVV floating-point vector types.
933935
const auto SetCommonVFPActions = [&](MVT VT) {
@@ -5401,7 +5403,16 @@ static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
54015403
Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
54025404
}
54035405

5404-
auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5406+
SDValue Mask, VL;
5407+
if (Op->isVPOpcode()) {
5408+
Mask = Op.getOperand(2);
5409+
if (VT.isFixedLengthVector())
5410+
Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5411+
Subtarget);
5412+
VL = Op.getOperand(3);
5413+
} else {
5414+
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5415+
}
54055416

54065417
SDValue NewY = Y;
54075418
if (!XIsNeverNan) {
@@ -5422,7 +5433,9 @@ static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
54225433
}
54235434

54245435
unsigned Opc =
5425-
Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::VFMAX_VL : RISCVISD::VFMIN_VL;
5436+
Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5437+
? RISCVISD::VFMAX_VL
5438+
: RISCVISD::VFMIN_VL;
54265439
SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
54275440
DAG.getUNDEF(ContainerVT), Mask, VL);
54285441
if (VT.isFixedLengthVector())
@@ -6647,6 +6660,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
66476660
!Subtarget.hasVInstructionsF16()))
66486661
return SplitVPOp(Op, DAG);
66496662
return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6663+
case ISD::VP_FMAXIMUM:
6664+
case ISD::VP_FMINIMUM:
6665+
if (Op.getValueType() == MVT::nxv32f16 &&
6666+
(Subtarget.hasVInstructionsF16Minimal() &&
6667+
!Subtarget.hasVInstructionsF16()))
6668+
return SplitVPOp(Op, DAG);
6669+
return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
66506670
case ISD::EXPERIMENTAL_VP_SPLICE:
66516671
return lowerVPSpliceExperimental(Op, DAG);
66526672
case ISD::EXPERIMENTAL_VP_REVERSE:

0 commit comments

Comments
 (0)